├── r3 ├── __init__.py ├── app │ ├── __init__.py │ ├── handlers │ │ ├── healthcheck.py │ │ ├── __init__.py │ │ ├── index.py │ │ └── stream.py │ ├── keys.py │ ├── config.py │ ├── utils.py │ ├── app.py │ ├── server.py │ └── templates │ │ └── index.html ├── web │ ├── __init__.py │ ├── static │ │ ├── js │ │ │ ├── tabs.js │ │ │ ├── progress.js │ │ │ └── bootstrap.min.js │ │ ├── img │ │ │ └── logo.png │ │ └── css │ │ │ ├── reset.css │ │ │ ├── progress.css │ │ │ ├── style.css │ │ │ └── bootstrap.min.css │ ├── config.py │ ├── templates │ │ ├── show_key.html │ │ ├── mappers.html │ │ ├── job-types.html │ │ ├── failed.html │ │ ├── master.html │ │ ├── index.html │ │ └── stats.html │ ├── extensions.py │ ├── server.py │ └── app.py ├── worker │ ├── __init__.py │ └── mapper.py └── version.py ├── test ├── __init__.py ├── test_sync.py ├── chekhov.txt ├── app_config.py ├── count_words_reducer.py ├── count_words_stream.py ├── count_words_mapper.py ├── test_count_words.py └── small-chekhov.txt ├── r3.png ├── r3-web-1.jpg ├── r3-web-2.jpg ├── r3-web-3.jpg ├── r3-web-4.jpg ├── requirements.txt ├── MANIFEST.in ├── .gitignore ├── Makefile ├── setup.py ├── diagramly-r3.xml ├── README.md └── redis.conf /r3/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /r3/app/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /r3/web/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/test_sync.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /r3/worker/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /r3/web/static/js/tabs.js: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /r3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heynemann/r3/HEAD/r3.png -------------------------------------------------------------------------------- /r3-web-1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heynemann/r3/HEAD/r3-web-1.jpg -------------------------------------------------------------------------------- /r3-web-2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heynemann/r3/HEAD/r3-web-2.jpg -------------------------------------------------------------------------------- /r3-web-3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heynemann/r3/HEAD/r3-web-3.jpg -------------------------------------------------------------------------------- /r3-web-4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heynemann/r3/HEAD/r3-web-4.jpg -------------------------------------------------------------------------------- /test/chekhov.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heynemann/r3/HEAD/test/chekhov.txt -------------------------------------------------------------------------------- /r3/web/static/img/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heynemann/r3/HEAD/r3/web/static/img/logo.png -------------------------------------------------------------------------------- /r3/version.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | 4 | __version__ = '0.2.0' 5 | version = __version__ 6 | VERSION = __version__ 7 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | redis 2 | tornado-redis 3 | http://gevent.googlecode.com/files/gevent-1.0b2.tar.gz 4 | tornado-pyvows 5 | ujson 6 | flask 7 | argparse 8 | -------------------------------------------------------------------------------- /test/app_config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | 4 | INPUT_STREAMS = [ 5 | 'test.count_words_stream.CountWordsStream' 6 | ] 7 | 8 | REDUCERS = [ 9 | 'test.count_words_reducer.CountWordsReducer' 10 | ] 11 | -------------------------------------------------------------------------------- /r3/app/handlers/healthcheck.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | 4 | from r3.app.handlers import BaseHandler 5 | 6 | class HealthcheckHandler(BaseHandler): 7 | def get(self): 8 | self.write('WORKING') 9 | 10 | -------------------------------------------------------------------------------- /r3/web/config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | 4 | DEBUG = True 5 | SECRET_KEY = 'development key' 6 | 7 | WEB_HOST = '0.0.0.0' 8 | WEB_PORT = 8888 9 | 10 | REDIS_HOST = 'localhost' 11 | REDIS_PORT = 7778 12 | REDIS_PASS = 'r3' 13 | -------------------------------------------------------------------------------- /r3/web/static/js/progress.js: -------------------------------------------------------------------------------- 1 | $(function() { 2 | $(".meter > span").each(function() { 3 | $(this) 4 | .data("origWidth", $(this).width()) 5 | .width(0) 6 | .animate({ 7 | width: $(this).data("origWidth") 8 | }, 1200); 9 | }); 10 | }); 11 | 12 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | prune dist 2 | prune build 3 | prune test 4 | recursive-include r3 *.py 5 | recursive-include r3 *.gif 6 | recursive-include r3 *.png 7 | recursive-include r3 *.jpg 8 | recursive-include r3 *.jpeg 9 | recursive-include r3 *.html 10 | recursive-include r3 *.htm 11 | recursive-include r3 *.js 12 | recursive-include r3 *.css 13 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[co] 2 | 3 | # Packages 4 | *.egg 5 | *.egg-info 6 | dist 7 | build 8 | eggs 9 | parts 10 | bin 11 | var 12 | sdist 13 | develop-eggs 14 | .installed.cfg 15 | 16 | # Installer logs 17 | pip-log.txt 18 | 19 | # Unit test / coverage reports 20 | .coverage 21 | .tox 22 | 23 | #Translations 24 | *.mo 25 | 26 | #Mr Developer 27 | .mr.developer.cfg 28 | .DS_Store 29 | *.geany 30 | -------------------------------------------------------------------------------- /test/count_words_reducer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | 4 | from collections import defaultdict 5 | 6 | class CountWordsReducer: 7 | job_type = 'count-words' 8 | 9 | def reduce(self, app, items): 10 | word_freq = defaultdict(int) 11 | for line in items: 12 | for word, frequency in line: 13 | word_freq[word] += frequency 14 | 15 | return word_freq 16 | -------------------------------------------------------------------------------- /test/count_words_stream.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | 4 | from os.path import abspath, dirname, join 5 | 6 | class CountWordsStream: 7 | job_type = 'count-words' 8 | group_size = 1000 9 | 10 | def process(self, app, arguments): 11 | with open(abspath(join(dirname(__file__), 'chekhov.txt'))) as f: 12 | contents = f.readlines() 13 | 14 | return [line.lower() for line in contents] 15 | 16 | 17 | -------------------------------------------------------------------------------- /r3/app/handlers/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | 4 | import tornado.web 5 | 6 | from r3.app.utils import logger 7 | 8 | class BaseHandler(tornado.web.RequestHandler): 9 | def _error(self, status, msg=None): 10 | self.set_status(status) 11 | if msg is not None: 12 | logger.error(msg) 13 | self.finish() 14 | 15 | @property 16 | def redis(self): 17 | return self.application.redis 18 | 19 | 20 | -------------------------------------------------------------------------------- /test/count_words_mapper.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | 4 | from r3.worker.mapper import Mapper 5 | 6 | class CountWordsMapper(Mapper): 7 | job_type = 'count-words' 8 | 9 | def map(self, lines): 10 | #time.sleep(0.5) 11 | return list(self.split_words(lines)) 12 | 13 | def split_words(self, lines): 14 | for line in lines: 15 | for word in line.split(): 16 | yield word.strip().strip('.').strip(','), 1 17 | -------------------------------------------------------------------------------- /r3/app/keys.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | 4 | ALL_KEYS = 'r3::*' 5 | 6 | # MAPPER KEYS 7 | MAPPERS_KEY = 'r3::mappers' 8 | MAPPER_INPUT_KEY = 'r3::jobs::%s::input' 9 | MAPPER_OUTPUT_KEY = 'r3::jobs::%s::%s::output' 10 | MAPPER_ERROR_KEY = 'r3::jobs::%s::errors' 11 | MAPPER_WORKING_KEY = 'r3::jobs::%s::working' 12 | LAST_PING_KEY = 'r3::mappers::%s::last-ping' 13 | 14 | # JOB TYPES KEYS 15 | JOB_TYPES_KEY = 'r3::job-types' 16 | JOB_TYPES_ERRORS_KEY = 'r3::jobs::*::errors' 17 | JOB_TYPE_KEY = 'r3::job-types::%s' 18 | 19 | # STATS KEYS 20 | PROCESSED = 'r3::stats::processed' 21 | PROCESSED_SUCCESS = 'r3::stats::processed::success' 22 | PROCESSED_FAILED = 'r3::stats::processed::fail' 23 | 24 | -------------------------------------------------------------------------------- /r3/app/config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | 4 | from os.path import isabs, abspath 5 | import imp 6 | 7 | class Config: 8 | def __init__(self, path): 9 | if not isabs(path): 10 | self.path = abspath(path) 11 | else: 12 | self.path = path 13 | 14 | self.load() 15 | 16 | def load(self): 17 | with open(self.path) as config_file: 18 | name = 'configuration' 19 | code = config_file.read() 20 | module = imp.new_module(name) 21 | exec code in module.__dict__ 22 | 23 | for name, value in module.__dict__.iteritems(): 24 | setattr(self, name, value) 25 | 26 | 27 | -------------------------------------------------------------------------------- /r3/app/handlers/index.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | 4 | from r3.app.handlers import BaseHandler 5 | from r3.app.keys import MAPPERS_KEY 6 | from r3.version import __version__ 7 | 8 | class IndexHandler(BaseHandler): 9 | def get(self): 10 | has_reducers = len(self.application.reducers.keys()) > 0 11 | 12 | self.render( 13 | "../templates/index.html", 14 | title="", 15 | r3_version=__version__, 16 | input_streams=self.application.input_streams.keys(), 17 | has_reducers=has_reducers, 18 | mappers=self.get_mappers() 19 | ) 20 | 21 | def get_mappers(self): 22 | return self.redis.smembers(MAPPERS_KEY) 23 | 24 | 25 | -------------------------------------------------------------------------------- /r3/web/templates/show_key.html: -------------------------------------------------------------------------------- 1 | {% extends "master.html" %} 2 | 3 | {% block title %} - Overview{% endblock %} 4 | 5 | {% block css %} 6 | {{ super() }} 7 | 10 | {% endblock %} 11 | 12 | {% block body %} 13 |
{{ v|safe }}{{ value|safe }}
25 | {% endif %}
26 | | Name | 23 |Working on | 24 ||
|---|---|---|
| {{ mapper }} | 30 | {% if status %} 31 |Processing job {{ status }} | 32 | {% else %} 33 |Waiting for a new job... | 34 | {% endif %} 35 |
| No mappers registered so far... | 39 |||
| Job ID | 16 |Phase | 17 |
|---|---|
| {{ job }} | 23 |Mapping | 24 |
| Nothing happening here right now... | 28 ||
| Job ID | 37 |Phase | 38 |
|---|---|
| Nothing happening here right now... | 43 ||
| Job Type | 27 |Job ID | 28 |Date | 29 |Message | 30 |31 | |
|---|---|---|---|---|
| {{ error['job_key'] }} | 37 |{{ error['job_id'] }} | 38 |{{ error['date'] }} | 39 |{{ error['error'] }} | 40 |delete | 41 |
| No errors happened so far. Yay!!! | 45 |||||
| Job ID | 26 |Phase | 27 |
|---|---|
| {{ job }} | 33 |Mapping | 34 |
| Nothing happening here right now... | 38 ||
| Job ID | 47 |Phase | 48 |
|---|---|
| Nothing happening here right now... | 53 ||
| Name | 77 |Working on | 78 ||
|---|---|---|
| {{ mapper }} | 84 | {% if status %} 85 |Processing job {{ status['job_id'] }} | 86 | {% else %} 87 |Waiting for a new job... | 88 | {% endif %} 89 |
| No mappers registered so far... | 93 |||
| job-types | 24 |{{ g.job_types|count }} | 25 |
| redis server | 28 |{{ g.config['REDIS_HOST'] }}:{{ g.config['REDIS_PORT'] }} | 29 |
| failed jobs | 32 |{{ failed }} | 33 |
| processed | 36 |{{ processed }} | 37 |
| mappers | 40 |{{ g.mappers|count }} | 41 |
| key name | 54 |type | 55 |size | 56 |57 | |
|---|---|---|---|
| {{ key['name'] }} | 64 |{{ key['type'] }} | 65 |{{ key['size'] }} | 66 |delete | 67 |
| No keys owned by r³ | 71 ||||
| {{ key }} | 85 |{{ value }} | 86 |
Input Streams are classes that generate units-of-work that your mappers will work on.
186 |Creating them is as simple as creating a class that has a process method and a job_type argument:
187 | 188 |After you create your input stream, just add it to a config.py file:
189 | 190 |Then pass the file path as an argument to r3-app like this:
191 | 192 |For more information check the documentation online.
193 |Setting mappers to run your map/reduce tasks is an integral part of r³ and is as simple as creating a class that inherits from Mapper:
200 | 201 |Running the mappers is pretty simple as well. Say you want to run four different mappers:
202 | 203 |For more information check the documentation online.
204 |Reducers are the classes that get the mapped units-of-work generated by your mappers and process them into a single coherent result.
211 |Creating them is as simple as creating a class that has a reduce method and a job_type argument:
212 | 213 |After you create your input stream, just add it to a config.py file:
214 | 215 |Then pass the file path as an argument to r3-app like this:
216 | 217 |For more information check the documentation online.
218 |Please be advised that the link below may not work if your input stream requires additional arguments in it's URL.
236 |