├── .gitattributes ├── .gitignore ├── .pyup.yml ├── .travis.yml ├── CONTRIBUTING.rst ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.rst ├── bin ├── benchmark └── runpipe ├── dev-requirements.txt ├── docs ├── AUTHORS.rst ├── CHANGES.rst ├── COOKBOOK.rst ├── FAQ.rst ├── INSTALLATION.rst └── TODO.rst ├── examples ├── __init__.py ├── demo.py ├── gigs.py ├── kazeeki.py ├── simple1.py ├── simple2.py ├── split.py ├── usage.ipynb ├── usage.py └── wired.py ├── helpers ├── check-stage ├── clean ├── pippy ├── srcdist └── wheel ├── manage.py ├── optional-requirements.txt ├── pyproject.toml ├── requirements.txt ├── riko ├── __init__.py ├── autorss.py ├── bado │ ├── __init__.py │ ├── io.py │ ├── itertools.py │ ├── microdom.py │ ├── mock.py │ ├── requests.py │ ├── sux.py │ └── util.py ├── cast.py ├── collections.py ├── currencies.py ├── data │ ├── Politik.xml │ ├── TheEdTechie.xml │ ├── Topthemen.xml │ ├── autoblog.xml │ ├── bbc.html │ ├── bbci.co.uk.xml │ ├── caltrain.html │ ├── capnorth.xml │ ├── cnn.html │ ├── countries.csv │ ├── currencies.csv │ ├── currencies.json │ ├── delicious.xml │ ├── elance.json │ ├── election_results.json │ ├── feed.xml │ ├── fourtitude.xml │ ├── freelancer.json │ ├── gawker.xml │ ├── gigs.json │ ├── greenhughes.xml │ ├── guru.json │ ├── health.xml │ ├── lorem.txt │ ├── odesk.json │ ├── ouseful.xml │ ├── ouseful_feedburner.xml │ ├── places.xml │ ├── podcast.xml │ ├── psychemedia_delicious.xml │ ├── psychemedia_slideshare.xml │ ├── quote.json │ ├── schools.xml │ ├── sciencedaily.html │ ├── scotland.xml │ ├── spreadsheet.csv │ ├── status.csv │ ├── topstories.xml │ ├── users.jyu.fi.html │ ├── yodel.xml │ └── yql.xml ├── dates.py ├── dotdict.py ├── locations.py ├── modules │ ├── __init__.py │ ├── count.py │ ├── csv.py │ ├── currencyformat.py │ ├── dateformat.py │ ├── exchangerate.py │ ├── feedautodiscovery.py │ ├── fetch.py │ ├── fetchdata.py │ ├── fetchpage.py │ ├── fetchsitefeed.py │ ├── fetchtext.py │ ├── filter.py │ ├── geolocate.py │ ├── hash.py │ ├── input.py │ ├── itembuilder.py │ ├── join.py │ ├── refind.py │ ├── regex.py │ ├── rename.py │ ├── reverse.py │ ├── rssitembuilder.py │ ├── simplemath.py │ ├── slugify.py │ ├── sort.py │ ├── split.py │ ├── strconcat.py │ ├── strfind.py │ ├── strreplace.py │ ├── strtransform.py │ ├── subelement.py │ ├── substr.py │ ├── sum.py │ ├── tail.py │ ├── timeout.py │ ├── tokenizer.py │ ├── truncate.py │ ├── typecast.py │ ├── udf.py │ ├── union.py │ ├── uniq.py │ ├── urlbuilder.py │ ├── urlparse.py │ ├── xpathfetchpage.py │ └── yql.py ├── parsers.py └── utils.py ├── setup.cfg ├── setup.py ├── tests ├── __init__.py ├── pylintrc ├── test.py └── test_examples.py └── tox.ini /.gitattributes: -------------------------------------------------------------------------------- 1 | riko/data/* linguist-vendored 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Python 2 | *.py[cod] 3 | *.DS_Store 4 | *.ipynb 5 | *checkpoint.ipynb 6 | .cookiecutter 7 | .ipynb_checkpoints/* 8 | .idea/ 9 | example*.log 10 | examples/.ipynb_checkpoints/* 11 | 12 | # C extensions 13 | *.so 14 | 15 | # Packages 16 | *.egg* 17 | *.egg-info 18 | .eggs 19 | .installed.cfg 20 | .installed.cfg 21 | *-0.*/* 22 | dist 23 | build 24 | eggs 25 | parts 26 | var 27 | sdist 28 | develop-eggs 29 | lib 30 | lib64 31 | venv 32 | 33 | # Installer logs 34 | pip-log.txt 35 | 36 | # Unit test / coverage reports 37 | *.wpu 38 | *.rope 39 | *.noseids 40 | *.ropeproject 41 | .coverage 42 | .tox 43 | .noseids 44 | .cache/* 45 | .scripttest 46 | coverage.xml 47 | cover/* 48 | htmlcov/* 49 | nosetests.xml 50 | 51 | # Translations 52 | *.mo 53 | 54 | # Mr Developer 55 | .mr.developer.cfg 56 | .project 57 | .pydevproject 58 | 59 | # Complexity 60 | output/*.html 61 | output/*/index.html 62 | 63 | # Sphinx 64 | docs/_build 65 | build/* 66 | 67 | # Misc 68 | *.pstats 69 | .vscode 70 | callgraph.svg 71 | ttyrecord 72 | 73 | -------------------------------------------------------------------------------- /.pyup.yml: -------------------------------------------------------------------------------- 1 | # autogenerated pyup.io config file 2 | # see https://pyup.io/docs/configuration/ for all available options 3 | 4 | update: security 5 | pin: False 6 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: false 2 | language: python 3 | python: 4 | - "3.7" 5 | - "3.8" 6 | - "3.9" 7 | - pypy3.7-7.3.2 8 | 9 | cache: 10 | directories: 11 | - ~/.cache/pip 12 | 13 | env: 14 | matrix: 15 | - OPTIONAL=true 16 | - OPTIONAL=false 17 | global: 18 | - PIP_WHEEL_DIR=$HOME/.cache/pip/wheels 19 | - PIP_FIND_LINKS=file://$HOME/.cache/pip/wheels 20 | - PYTHONHASHSEED=94967295 21 | 22 | matrix: 23 | fast_finish: true 24 | allow_failures: 25 | - python: pypy3.7-7.3.2 26 | exclude: 27 | - python: pypy3.7-7.3.2 28 | env: OPTIONAL=true 29 | 30 | notifications: 31 | email: 32 | on_success: always 33 | 34 | branches: 35 | except: 36 | - /^v[0-9]/ 37 | 38 | before_install: 39 | - pip install -U pip 40 | - pip install wheel 41 | 42 | install: 43 | - pip install -r dev-requirements.txt 44 | - pip install -r requirements.txt 45 | - | 46 | if [[ "$OPTIONAL" == "true" ]]; then 47 | pip wheel -r optional-requirements.txt 48 | pip install -r optional-requirements.txt 49 | fi 50 | 51 | script: 52 | - manage lint && manage test 53 | 54 | after_success: 55 | - if [[ "$TRAVIS_PULL_REQUEST" == "true" ]]; then exit 0; fi 56 | - if [[ "$TRAVIS_BRANCH" != "master" ]]; then exit 0; fi 57 | - if [[ "$TRAVIS_PYTHON_VERSION" != "3.9" ]]; then exit 0; fi 58 | -------------------------------------------------------------------------------- /CONTRIBUTING.rst: -------------------------------------------------------------------------------- 1 | ============ 2 | Contributing 3 | ============ 4 | 5 | Contributions are welcome, and they are greatly appreciated! Every 6 | little bit helps, and credit will always be given. 7 | 8 | When contributing, please mimic the coding style/conventions used in this repo. 9 | If you add new classes or functions, please add the appropriate doc blocks with 10 | examples. Also, make sure the python linter and tests pass. 11 | 12 | Ready to contribute? Here's how. 13 | 14 | Types of Contributions 15 | ---------------------- 16 | 17 | Feedback & Bug Reports 18 | ~~~~~~~~~~~~~~~~~~~~~~ 19 | 20 | The best way to send feedback or report a bug is to file an issue at 21 | https://github.com/nerevu/riko/issues. 22 | 23 | If you are reporting a bug, please include: 24 | 25 | * Your operating system name and version. 26 | * Any details about your local setup that might be helpful in troubleshooting. 27 | * Detailed steps to reproduce the bug. 28 | 29 | Bug Fixes 30 | ~~~~~~~~~ 31 | 32 | Look through the GitHub `issues`_ for anything tagged with ``bug`` and hack away. 33 | 34 | Feature Implementation 35 | ~~~~~~~~~~~~~~~~~~~~~~ 36 | 37 | Look through the GitHub `issues`_ for anything tagged with ``feature`` and hack away. 38 | 39 | If you are *proposing* a feature: 40 | 41 | * Explain in detail how it would work. 42 | * To make it easier to implement, Keep the scope as narrow as possible. 43 | * Remember that this is a volunteer-driven project, and that contributions 44 | are welcome :) 45 | 46 | Documentation 47 | ~~~~~~~~~~~~~ 48 | 49 | riko could always use more documentation, whether as part of the 50 | official docs, in docstrings, or even on the web in blog posts, articles, and such. 51 | Feel free to contribute any type of documentation. 52 | 53 | Get Started! 54 | ------------ 55 | 56 | Ready to contribute? Here's how to set up ``riko`` for local development. 57 | 58 | 1. Fork the ``riko`` repo on GitHub and clone 59 | 60 | .. code-block:: bash 61 | 62 | git clone git@github.com:/riko.git 63 | cd riko 64 | 65 | 2. Setup a new `virtualenv`_ with ``virtualenvwrapper`` 66 | 67 | .. code-block:: bash 68 | 69 | mkvirtualenv --no-site-packages riko 70 | 71 | Or, if you only have ``virtualenv`` installed 72 | 73 | .. code-block:: bash 74 | 75 | virtualenv --no-site-packages ~/.venvs/riko 76 | source ~/.venvs/riko/bin/activate 77 | 78 | 3. Install required modules 79 | 80 | Python3 81 | 82 | .. code-block:: bash 83 | 84 | pip install -r dev-requirements.txt 85 | pip install -r optional-requirements.txt 86 | pip install -r requirements.txt 87 | 88 | Python2 89 | 90 | .. code-block:: bash 91 | 92 | pip install -r dev-requirements.txt 93 | pip install -r optional-requirements.txt 94 | pip install -r py2-requirements.txt 95 | 96 | 4. Run setup develop script 97 | 98 | .. code-block:: bash 99 | 100 | python setup.py develop 101 | 102 | 5. Create a branch for local development 103 | 104 | .. code-block:: bash 105 | 106 | git checkout -b name-of-your-bugfix-or-feature 107 | 108 | 6. Make your changes and run linter and tests 109 | 110 | .. code-block:: bash 111 | 112 | manage lint 113 | manage test 114 | 115 | # or to run the full integration tests 116 | tox 117 | 118 | 5. Commit your changes and push your branch to GitHub 119 | 120 | .. code-block:: bash 121 | 122 | git add . 123 | git commit -m "Your detailed description of your changes." 124 | git push origin name-of-your-bugfix-or-feature 125 | 126 | 6. Submit a pull request on the riko `repo`_. 127 | 128 | Pull Request Guidelines 129 | ----------------------- 130 | 131 | Before you submit a pull request, check that it meets these guidelines: 132 | 133 | 1. The pull request includes tests. 134 | 2. If the pull request adds functionality, the docs should be updated: Put 135 | your new functionality into a function with a docstring, and add the 136 | feature to the list in README.rst. 137 | 138 | .. _issues: https://github.com/nerevu/riko/issues 139 | .. _repo: https://github.com/nerevu/riko 140 | .. _virtualenv: https://virtualenv.pypa.io/en/latest/index.html 141 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016, Reuben Cummings 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so, 10 | subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include riko/data * 2 | recursive-include tests * 3 | recursive-include helpers * 4 | recursive-include docs * 5 | recursive-include examples * 6 | include LICENSE 7 | include *.rst 8 | include *requirements.txt 9 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: help clean check-stage pipme require lint test tox register upload release sdist wheel 2 | 3 | help: 4 | @echo "clean - remove Python file and build artifacts" 5 | @echo "check-stage - check staged changes for lint errors" 6 | @echo "pipme - install requirements.txt" 7 | @echo "require - create requirements.txt" 8 | @echo "lint - check style with flake8" 9 | @echo "test - run nose and script tests" 10 | @echo "release - package and upload a release" 11 | @echo "sdist - create a source distribution package" 12 | @echo "wheel - create a wheel package" 13 | @echo "upload - upload dist files" 14 | @echo "register - register package with PyPI" 15 | @echo "tox - run tests on every Python version with tox" 16 | 17 | clean: 18 | helpers/clean 19 | 20 | check-stage: 21 | helpers/check-stage 22 | 23 | pipme: 24 | pip install -r requirements.txt 25 | 26 | require: 27 | pip freeze -l | grep -vxFf dev-requirements.txt > requirements.txt 28 | 29 | lint: 30 | flake8 riko tests 31 | 32 | test: 33 | nosetests -xv 34 | python tests/test.py 35 | 36 | release: clean sdist wheel upload 37 | 38 | register: 39 | python setup.py register 40 | 41 | sdist: 42 | clean 43 | helpers/srcdist 44 | 45 | wheel: 46 | clean 47 | helpers/wheel 48 | 49 | upload: 50 | twine upload dist/* 51 | 52 | tox: 53 | tox 54 | -------------------------------------------------------------------------------- /bin/benchmark: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # vim: sw=4:ts=4:expandtab 4 | 5 | from __future__ import ( 6 | absolute_import, division, print_function, unicode_literals) 7 | 8 | import sys 9 | 10 | from os import path as p 11 | from functools import partial 12 | from multiprocessing.dummy import Pool as ThreadPool 13 | from multiprocessing import Pool 14 | from time import time, sleep 15 | from itertools import chain 16 | 17 | from builtins import * # noqa # pylint: disable=unused-import 18 | 19 | sys.path.append('../riko') 20 | 21 | from riko import get_path 22 | from riko.bado import coroutine, return_value, react 23 | from riko.bado.util import async_sleep 24 | from riko.bado.itertools import async_imap 25 | from riko.modules.fetch import pipe, async_pipe 26 | from riko.collections import ( 27 | SyncPipe, SyncCollection, AsyncPipe, AsyncCollection, get_chunksize, 28 | get_worker_cnt) 29 | 30 | NUMBER = 1 31 | LOOPS = 1 32 | DELAY = 0.1 33 | 34 | parent = p.join(p.abspath(p.dirname(p.dirname(__file__))), 'data') 35 | files = [ 36 | 'ouseful.xml', 37 | 'feed.xml', 38 | 'delicious.xml', 39 | 'psychemedia_delicious.xml', 40 | 'ouseful_feedburner.xml', 41 | 'TheEdTechie.xml', 42 | 'yodel.xml', 43 | 'gawker.xml', 44 | 'health.xml', 45 | 'topstories.xml', 46 | 'autoblog.xml', 47 | 'fourtitude.xml', 48 | 'greenhughes.xml', 49 | 'psychemedia_slideshare.xml'] 50 | 51 | urls = [get_path(f) for f in files] 52 | confs = [{'url': url, 'sleep': DELAY} for url in urls] 53 | sources = [{'url': url} for url in urls] 54 | length = len(files) 55 | iterable = [DELAY for x in files] 56 | 57 | 58 | def baseline_sync(): 59 | return list(map(sleep, iterable)) 60 | 61 | 62 | def baseline_threads(): 63 | workers = get_worker_cnt(length) 64 | chunksize = get_chunksize(length, workers) 65 | pool = ThreadPool(workers) 66 | return list(pool.imap_unordered(sleep, iterable, chunksize=chunksize)) 67 | 68 | 69 | def baseline_procs(): 70 | workers = get_worker_cnt(length, False) 71 | chunksize = get_chunksize(length, workers) 72 | pool = Pool(workers) 73 | return list(pool.imap_unordered(sleep, iterable, chunksize=chunksize)) 74 | 75 | 76 | def sync_pipeline(): 77 | pipes = (pipe(conf=conf) for conf in confs) 78 | return list(chain.from_iterable(pipes)) 79 | 80 | 81 | def sync_pipe(): 82 | streams = (SyncPipe('fetch', conf=conf).list for conf in confs) 83 | return list(chain.from_iterable(streams)) 84 | 85 | 86 | def sync_collection(): 87 | return SyncCollection(sources, sleep=DELAY).list 88 | 89 | 90 | def par_sync_collection(): 91 | return SyncCollection(sources, parallel=True, sleep=DELAY).list 92 | 93 | 94 | def baseline_async(): 95 | return async_imap(async_sleep, iterable) 96 | 97 | 98 | def async_pipeline(): 99 | d = async_imap(lambda conf: async_pipe(conf=conf), confs) 100 | d.addCallbacks(list, print) 101 | 102 | def async_pipe(): 103 | asyncCallable = lambda conf: AsyncPipe('fetch', conf=conf).list 104 | d = async_imap(asyncCallable, confs) 105 | d.addCallbacks(list, print) 106 | 107 | 108 | def async_collection(): 109 | return AsyncCollection(sources, sleep=DELAY).list 110 | 111 | 112 | def parse_results(results): 113 | switch = {0: 'secs', 3: 'msecs', 6: 'usecs'} 114 | best = min(results) 115 | 116 | for places in [0, 3, 6]: 117 | factor = pow(10, places) 118 | if 1 / best // factor == 0: 119 | break 120 | 121 | return round(best * factor, 2), switch[places] 122 | 123 | 124 | def print_time(test, max_chars, run_time, units): 125 | padded = test.zfill(max_chars).replace('0', ' ') 126 | msg = '%s - %i repetitions/loop, best of %i loops: %s %s' 127 | print(msg % (padded, NUMBER, LOOPS, run_time, units)) 128 | 129 | 130 | @coroutine 131 | def run_async(reactor, tests, max_chars): 132 | for test in tests: 133 | results = [] 134 | 135 | for i in range(LOOPS): 136 | loop = 0 137 | 138 | for j in range(NUMBER): 139 | start = time() 140 | yield test() 141 | loop += time() - start 142 | 143 | results.append(loop) 144 | 145 | run_time, units = parse_results(results) 146 | print_time(test.__name__, max_chars, run_time, units) 147 | 148 | return_value(None) 149 | 150 | if __name__ == '__main__': 151 | from timeit import repeat 152 | 153 | run = partial(repeat, repeat=LOOPS, number=NUMBER) 154 | sync_tests = [ 155 | 'baseline_sync', 'baseline_threads', 'baseline_procs', 'sync_pipeline', 156 | 'sync_pipe', 'sync_collection', 'par_sync_collection'] 157 | 158 | async_tests = [baseline_async, async_pipeline, async_pipe, async_collection] 159 | combined_tests = sync_tests + [f.__name__ for f in async_tests] 160 | max_chars = max(list(map(len, combined_tests))) 161 | 162 | for test in sync_tests: 163 | results = run('%s()' % test, setup='from __main__ import %s' % test) 164 | run_time, units = parse_results(results) 165 | print_time(test, max_chars, run_time, units) 166 | 167 | react(run_async, [async_tests, max_chars]) 168 | -------------------------------------------------------------------------------- /bin/runpipe: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import ( 5 | absolute_import, division, print_function, unicode_literals) 6 | 7 | import requests 8 | import sys 9 | sys.path.append('../riko') 10 | 11 | from os import path as p 12 | from importlib import import_module 13 | 14 | try: 15 | # python 3.3+ 16 | from importlib.machinery import SourceFileLoader 17 | except ImportError: 18 | try: 19 | # python 3.4+ 20 | from importlib.util import spec_from_file_location, module_from_spec 21 | except ImportError: 22 | # python 2.7- 23 | from imp import load_source as load_file 24 | io_error = IOError 25 | else: 26 | io_error = FileNotFoundError 27 | 28 | def load_file(name, src): 29 | location = 'examples/%s.py' % src 30 | spec = spec_from_file_location(name, location) 31 | module = module_from_spec(spec) 32 | spec.loader.exec_module(module) 33 | return module 34 | else: 35 | io_error = FileNotFoundError 36 | load_file = lambda name, src: SourceFileLoader(name, src).load_module() 37 | 38 | from argparse import RawTextHelpFormatter, ArgumentParser 39 | from riko.bado import react 40 | 41 | parser = ArgumentParser( 42 | description='description: Runs a riko pipe', prog='runpipe', 43 | usage='%(prog)s [pipeid]', formatter_class=RawTextHelpFormatter) 44 | 45 | parser.add_argument( 46 | dest='pipeid', nargs='?', default=sys.stdin, 47 | help='The pipe to run (default: reads from stdin).') 48 | 49 | parser.add_argument( 50 | '-a', '--async', dest='isasync', action='store_true', default=False, 51 | help="Load async pipe.\n\n") 52 | 53 | parser.add_argument( 54 | '-t', '--test', action='store_true', default=False, 55 | help="Run in test mode (uses default inputs).\n\n") 56 | 57 | args = parser.parse_args() 58 | 59 | 60 | def file2name(path): 61 | return p.splitext(p.basename(path))[0] 62 | 63 | 64 | def run(): 65 | """CLI runner""" 66 | try: 67 | pipeid = args.pipeid.read() 68 | except AttributeError: 69 | pipeid = args.pipeid 70 | 71 | try: 72 | name = file2name('%s.py' % pipeid) 73 | module = load_file(name, pipeid) 74 | except io_error: 75 | try: 76 | module = import_module('examples.%s' % pipeid) 77 | except ImportError: 78 | exit('Pipe examples.%s not found!' % pipeid) 79 | 80 | if args.isasync: 81 | pipeline = getattr(module, 'async_pipe') 82 | react(pipeline, [args.test]) 83 | else: 84 | pipeline = getattr(module, 'pipe') 85 | pipeline(test=args.test) 86 | 87 | if __name__ == "__main__": 88 | run() 89 | -------------------------------------------------------------------------------- /dev-requirements.txt: -------------------------------------------------------------------------------- 1 | pip>20.0 2 | wheel>=0.29.0 3 | black>=19.3b0,<22.0 4 | coverage>=4.3.4,<6.0.0 5 | flake8>=3.7.9,<5.0.0 6 | flake8-black>=0.1.1,<0.3.0 7 | nose>=1.3.7,<2.0.0 8 | manage.py>=0.2.10,<0.3.0 9 | pkutils>=3.0.0,<4.0.0 10 | pylint>=2.5.0,<3.0.0 11 | responses>=0.9.0,<0.15.0 12 | scripttest>=1.3,<2.0 13 | setuptools>=42.0.2 14 | tox>=3.14.3,<4.0.0 15 | twine>=3.2.0,<4.0.0 16 | -------------------------------------------------------------------------------- /docs/AUTHORS.rst: -------------------------------------------------------------------------------- 1 | ======= 2 | Credits 3 | ======= 4 | 5 | Development Lead 6 | ---------------- 7 | 8 | * Reuben Cummings 9 | 10 | Contributors 11 | ------------ 12 | 13 | None yet. Why not be the first? 14 | -------------------------------------------------------------------------------- /docs/CHANGES.rst: -------------------------------------------------------------------------------- 1 | Changelog 2 | ========= 3 | 4 | %%version%% (unreleased) 5 | ------------------------ 6 | 7 | Bugfixes 8 | ~~~~~~~~ 9 | 10 | - Store downloaded packages in wheel dir. [Reuben Cummings] 11 | 12 | - Fix prefix generation. [Reuben Cummings] 13 | 14 | v0.35.1 (2016-07-22) 15 | -------------------- 16 | 17 | Bugfixes 18 | ~~~~~~~~ 19 | 20 | - Fix makefile lint command. [Reuben Cummings] 21 | 22 | - Update pygogo requirement (fixes #2) [Reuben Cummings] 23 | 24 | v0.35.0 (2016-07-19) 25 | -------------------- 26 | 27 | New 28 | ~~~ 29 | 30 | - Limit the number of unique items tracked. [Reuben Cummings] 31 | 32 | - Add grouping ability to count pipe. [Reuben Cummings] 33 | 34 | Bugfixes 35 | ~~~~~~~~ 36 | 37 | - Fix processor metadata. [Reuben Cummings] 38 | 39 | v0.34.0 (2016-07-19) 40 | -------------------- 41 | 42 | New 43 | ~~~ 44 | 45 | - Add list element searching to microdom. [Reuben Cummings] 46 | 47 | - Add more operations to filter pipes. [Reuben Cummings] 48 | 49 | Changes 50 | ~~~~~~~ 51 | 52 | - Merge async_pmap and async_imap. [Reuben Cummings] 53 | 54 | - Change deferToProcess name and arguments. [Reuben Cummings] 55 | 56 | - Rename modules/functions, and update docs. [Reuben Cummings] 57 | 58 | Bugfixes 59 | ~~~~~~~~ 60 | 61 | - Force getElementsByTagName to return child. [Reuben Cummings] 62 | 63 | - Only use FakeReactor when actually needed. [Reuben Cummings] 64 | 65 | - Fix async html parsing. [Reuben Cummings] 66 | 67 | - Prevent IndexError. [Reuben Cummings] 68 | 69 | - Fix async opening of http files. [Reuben Cummings] 70 | 71 | - Be lenient with html parsing. [Reuben Cummings] 72 | 73 | - Fix empty xpath and start value bugs. [Reuben Cummings] 74 | 75 | v0.33.0 (2016-07-01) 76 | -------------------- 77 | 78 | Changes 79 | ~~~~~~~ 80 | 81 | - Major refactor for py3 support: [Reuben Cummings] 82 | 83 | - fix py3 and open file errors 84 | - port missing twisted modules 85 | - refactor rss parsing 86 | - and streaming json support 87 | - rename request function 88 | - make benchmarks.py a script and add to tests 89 | 90 | Bugfixes 91 | ~~~~~~~~ 92 | 93 | - Fix pypy test errors. [Reuben Cummings] 94 | 95 | v0.32.0 (2016-06-16) 96 | -------------------- 97 | 98 | Changes 99 | ~~~~~~~ 100 | 101 | - Refactor to remove Twisted dependency. [Reuben Cummings] 102 | 103 | v0.31.0 (2016-06-16) 104 | -------------------- 105 | 106 | New 107 | ~~~ 108 | 109 | - Add parallel testing. [Reuben Cummings] 110 | 111 | v0.30.2 (2016-06-16) 112 | -------------------- 113 | 114 | Bugfixes 115 | ~~~~~~~~ 116 | 117 | - Add missing optional dependency. [Reuben Cummings] 118 | 119 | v0.30.1 (2016-06-16) 120 | -------------------- 121 | 122 | Bugfixes 123 | ~~~~~~~~ 124 | 125 | - Fix failed test runner. [Reuben Cummings] 126 | 127 | - Fix lxml dependency errors. [Reuben Cummings] 128 | 129 | v0.30.0 (2016-06-15) 130 | -------------------- 131 | 132 | New 133 | ~~~ 134 | 135 | - Try loading workflow from curdir first. [Reuben Cummings] 136 | 137 | Bugfixes 138 | ~~~~~~~~ 139 | 140 | - Fix remaining pypy errors. [Reuben Cummings] 141 | 142 | - Fix “newdict instance” error for pypy. [Reuben Cummings] 143 | 144 | - Add detagging to `fetchpage` async parser. [Reuben Cummings] 145 | 146 | v0.28.0 (2016-03-25) 147 | -------------------- 148 | 149 | New 150 | ~~~ 151 | 152 | - Add option to specify value if no regex match found. [Reuben Cummings] 153 | 154 | Changes 155 | ~~~~~~~ 156 | 157 | - Make default exchange rate field ‘content’ [Reuben Cummings] 158 | 159 | - Split now returns tier of feeds. [Reuben Cummings] 160 | 161 | Bugfixes 162 | ~~~~~~~~ 163 | 164 | - Fix test mode for input pipe. [Reuben Cummings] 165 | 166 | - Fix terminal parsing. [Reuben Cummings] 167 | 168 | - Fix input pipe if no inputs given. [Reuben Cummings] 169 | 170 | - Fix sleep config. [Reuben Cummings] 171 | 172 | - Fix json bool parsing. [Reuben Cummings] 173 | 174 | 175 | -------------------------------------------------------------------------------- /docs/INSTALLATION.rst: -------------------------------------------------------------------------------- 1 | Installation 2 | ------------ 3 | 4 | (You are using a `virtualenv`_, right?) 5 | 6 | At the command line, install riko using either ``pip`` (recommended) 7 | 8 | .. code-block:: bash 9 | 10 | pip install riko 11 | 12 | or ``easy_install`` 13 | 14 | .. code-block:: bash 15 | 16 | easy_install riko 17 | 18 | Detailed installation instructions 19 | ---------------------------------- 20 | 21 | If you have `virtualenvwrapper`_ installed, at the command line type: 22 | 23 | .. code-block:: bash 24 | 25 | mkvirtualenv riko 26 | pip install riko 27 | 28 | Or, if you only have ``virtualenv`` installed: 29 | 30 | .. code-block:: bash 31 | 32 | virtualenv ~/.venvs/riko 33 | source ~/.venvs/riko/bin/activate 34 | pip install riko 35 | 36 | Otherwise, you can install globally:: 37 | 38 | pip install riko 39 | 40 | .. _virtualenv: https://virtualenv.pypa.io/en/latest/index.html 41 | .. _virtualenvwrapper: https://virtualenvwrapper.readthedocs.org/en/latest/ 42 | -------------------------------------------------------------------------------- /docs/TODO.rst: -------------------------------------------------------------------------------- 1 | ==== 2 | TODO 3 | ==== 4 | 5 | - Upgrade to Python 3 6 | - Add more protocols (FTP, SSH, IMAP, etc.) 7 | - Add HDFS support 8 | 9 | .. todo:: vim: set filetype=rst: 10 | -------------------------------------------------------------------------------- /examples/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nerevu/riko/4d27102b605b8b4050ba566d5e0895d8d5f8b09a/examples/__init__.py -------------------------------------------------------------------------------- /examples/demo.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # vim: sw=4:ts=4:expandtab 3 | 4 | """ 5 | riko demo 6 | ~~~~~~~~~ 7 | 8 | Word Count 9 | 10 | >>> from riko import get_path 11 | >>> from riko.collections import SyncPipe 12 | >>> 13 | >>> url = get_path('users.jyu.fi.html') 14 | >>> fetch_conf = { 15 | ... 'url': url, 'start': '', 'end': '', 'detag': True} 16 | >>> replace_conf = {'rule': {'find': '\\n', 'replace': ' '}} 17 | >>> 18 | >>> counts = (SyncPipe('fetchpage', conf=fetch_conf) 19 | ... .strreplace(conf=replace_conf, assign='content') 20 | ... .tokenizer(conf={'delimiter': ' '}, emit=True) 21 | ... .count() 22 | ... .output) 23 | >>> 24 | >>> next(counts) == {'count': 70} 25 | True 26 | 27 | Fetching feeds 28 | 29 | >>> from riko.modules import fetch 30 | >>> 31 | >>> url = get_path('gawker.xml') 32 | >>> intersection = [ 33 | ... 'author', 'author.name', 'author.uri', 'dc:creator', 'id', 'link', 34 | ... 'pubDate', 'summary', 'title', 'y:id', 'y:published', 'y:title'] 35 | >>> feed = fetch.pipe(conf={'url': url}) 36 | >>> item = next(feed) 37 | >>> set(item).issuperset(intersection) 38 | True 39 | >>> item['title'][:24] == 'This Is What A Celebrity' 40 | True 41 | >>> item['link'][:23] == 'http://feeds.gawker.com' 42 | True 43 | """ 44 | from riko import get_path 45 | from riko.bado import coroutine 46 | from riko.collections import SyncPipe, AsyncPipe 47 | 48 | replace_conf = {"rule": {"find": "\n", "replace": " "}} 49 | health = get_path("health.xml") 50 | caltrain = get_path("caltrain.html") 51 | start = '' 52 | fetch_conf = {"url": caltrain, "start": start, "end": "", "detag": True} 53 | 54 | 55 | def pipe(test=False): 56 | s1 = SyncPipe("fetch", test=test, conf={"url": health}).output 57 | s2 = ( 58 | SyncPipe("fetchpage", test=test, conf=fetch_conf) 59 | .strreplace(conf=replace_conf, assign="content") 60 | .tokenizer(conf={"delimiter": " "}, emit=True) 61 | .count() 62 | .output 63 | ) 64 | 65 | print(next(s1)["title"], next(s2)["count"]) 66 | 67 | 68 | @coroutine 69 | def async_pipe(reactor, test=False): 70 | s1 = yield AsyncPipe("fetch", test=test, conf={"url": health}).output 71 | s2 = yield ( 72 | AsyncPipe("fetchpage", test=test, conf=fetch_conf) 73 | .strreplace(conf=replace_conf, assign="content") 74 | .tokenizer(conf={"delimiter": " "}, emit=True) 75 | .count() 76 | .output 77 | ) 78 | 79 | print(next(s1)["title"], next(s2)["count"]) 80 | -------------------------------------------------------------------------------- /examples/gigs.py: -------------------------------------------------------------------------------- 1 | from pprint import pprint 2 | from riko import get_path 3 | from riko.bado import coroutine 4 | from riko.collections import SyncPipe, AsyncPipe 5 | 6 | p1_conf = {"url": get_path("gigs.json"), "path": "value.items"} 7 | p2_conf = {"uniq_key": "link"} 8 | p3_conf = { 9 | "combine": "or", 10 | "mode": "block", 11 | "rule": [{"field": "title", "value": "php", "op": "contains"}], 12 | } 13 | 14 | p4_conf = {"rule": [{"sort_key": "pubDate", "sort_dir": "desc"}]} 15 | 16 | 17 | def pipe(test=False): 18 | stream = ( 19 | SyncPipe("fetchdata", conf=p1_conf, test=test) 20 | .uniq(conf=p2_conf) 21 | .filter(conf=p3_conf) 22 | .sort(conf=p4_conf) 23 | .list 24 | ) 25 | 26 | for i in stream: 27 | pprint(i) 28 | 29 | return stream 30 | 31 | 32 | @coroutine 33 | def async_pipe(reactor, test=False): 34 | stream = yield ( 35 | AsyncPipe("fetchdata", conf=p1_conf, test=test) 36 | .uniq(conf=p2_conf) 37 | .filter(conf=p3_conf) 38 | .sort(conf=p4_conf) 39 | .output 40 | ) 41 | 42 | for i in stream: 43 | pprint(i) 44 | -------------------------------------------------------------------------------- /examples/simple1.py: -------------------------------------------------------------------------------- 1 | from pprint import pprint 2 | from riko.bado import coroutine 3 | from riko.collections import SyncPipe, AsyncPipe 4 | 5 | p1_conf = { 6 | "attrs": [{"value": "http://www.caltrain.com/Fares/farechart.html", "key": "url"}] 7 | } 8 | 9 | p2_conf = {"rule": {"field": "url", "match": {"subkey": "url"}, "replace": "farechart"}} 10 | 11 | 12 | def pipe(test=False): 13 | stream = SyncPipe("itembuilder", conf=p1_conf, test=test).regex(conf=p2_conf).list 14 | 15 | for i in stream: 16 | pprint(str(i["url"])) 17 | 18 | return stream 19 | 20 | 21 | @coroutine 22 | def async_pipe(reactor, test=False): 23 | stream = yield ( 24 | AsyncPipe("itembuilder", conf=p1_conf, test=test).regex(conf=p2_conf).list 25 | ) 26 | 27 | for i in stream: 28 | pprint(str(i["url"])) 29 | -------------------------------------------------------------------------------- /examples/simple2.py: -------------------------------------------------------------------------------- 1 | from pprint import pprint 2 | from riko.bado import coroutine 3 | from riko.collections import SyncPipe, AsyncPipe 4 | 5 | p232_conf = { 6 | "attrs": [ 7 | {"value": "www.google.com", "key": "link"}, 8 | {"value": "google", "key": "title"}, 9 | {"value": "empty", "key": "author"}, 10 | ] 11 | } 12 | 13 | p421_conf = {"rule": [{"find": "empty", "param": "first", "replace": "ABC"}]} 14 | 15 | 16 | def pipe(test=False): 17 | stream = ( 18 | SyncPipe("itembuilder", conf=p232_conf, test=test) 19 | .strreplace(conf=p421_conf, field="author", assign="author") 20 | .list 21 | ) 22 | 23 | for i in stream: 24 | pprint(i) 25 | 26 | return stream 27 | 28 | 29 | @coroutine 30 | def async_pipe(reactor, test=False): 31 | stream = yield ( 32 | AsyncPipe("itembuilder", conf=p232_conf, test=test) 33 | .strreplace(conf=p421_conf, field="author", assign="author") 34 | .list 35 | ) 36 | 37 | for i in stream: 38 | pprint(i) 39 | -------------------------------------------------------------------------------- /examples/split.py: -------------------------------------------------------------------------------- 1 | from pprint import pprint 2 | from riko.bado import coroutine 3 | from riko.collections import SyncPipe, AsyncPipe 4 | 5 | p385_conf = {"type": "date"} 6 | p385_in = {"content": "12/2/2014"} 7 | p405_conf = {"format": "%B %d, %Y"} 8 | p393_conf = { 9 | "attrs": [ 10 | {"value": {"terminal": "date", "path": "dateformat"}, "key": "date"}, 11 | {"value": {"terminal": "year", "path": "year"}, "key": "year"}, 12 | ] 13 | } 14 | 15 | p385_kwargs = {"conf": p385_conf, "inputs": p385_in} 16 | 17 | 18 | def pipe(test=False): 19 | s1, s2 = ( 20 | SyncPipe("input", test=test, **p385_kwargs) 21 | .dateformat(conf=p405_conf) 22 | .split() 23 | .output 24 | ) 25 | 26 | p393_kwargs = {"conf": p393_conf, "date": s1, "year": s2, "test": test} 27 | stream = SyncPipe("itembuilder", **p393_kwargs).list 28 | 29 | for i in stream: 30 | pprint(i) 31 | 32 | return stream 33 | 34 | 35 | @coroutine 36 | def async_pipe(reactor, test=False): 37 | s1, s2 = yield ( 38 | AsyncPipe("input", test=test, **p385_kwargs) 39 | .dateformat(conf=p405_conf) 40 | .split() 41 | .output 42 | ) 43 | 44 | p393_kwargs = {"conf": p393_conf, "date": s1, "year": s2, "test": test} 45 | stream = yield AsyncPipe("itembuilder", **p393_kwargs).list 46 | 47 | for i in stream: 48 | pprint(i) 49 | -------------------------------------------------------------------------------- /examples/wired.py: -------------------------------------------------------------------------------- 1 | from pprint import pprint 2 | from riko.bado import coroutine 3 | from riko.collections import SyncPipe, AsyncPipe 4 | 5 | p120_conf = {"type": "text"} 6 | p120_inputs = {"format": "%B %d, %Y"} 7 | p112_conf = {"type": "date", "default": "5/4/82", "prompt": "enter a date"} 8 | p151_conf = {"format": {"terminal": "format", "path": "format"}} 9 | p100_conf = { 10 | "attrs": {"value": {"terminal": "value", "path": "dateformat"}, "key": "date"} 11 | } 12 | 13 | p120_kwargs = {"conf": p120_conf, "inputs": p120_inputs, "assign": "format"} 14 | 15 | 16 | def pipe(test=False): 17 | s1 = SyncPipe("input", test=test, **p120_kwargs).output 18 | s2 = ( 19 | SyncPipe("input", conf=p112_conf, test=test) 20 | .dateformat(conf=p151_conf, format=s1) 21 | .output 22 | ) 23 | 24 | stream = SyncPipe("itembuilder", conf=p100_conf, value=s2, test=test).list 25 | 26 | for i in stream: 27 | pprint(i) 28 | 29 | return stream 30 | 31 | 32 | @coroutine 33 | def async_pipe(reactor, test=False): 34 | s1 = yield AsyncPipe("input", test=test, **p120_kwargs).output 35 | s2 = yield ( 36 | AsyncPipe("input", conf=p112_conf, test=test) 37 | .dateformat(conf=p151_conf, format=s1) 38 | .output 39 | ) 40 | 41 | output_kwargs = {"conf": p100_conf, "value": s2, "test": test} 42 | output = yield (AsyncPipe("itembuilder", **output_kwargs).list) 43 | 44 | for i in output: 45 | pprint(i) 46 | -------------------------------------------------------------------------------- /helpers/check-stage: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | # 4 | # A script to disallow syntax errors to be committed 5 | # by running a checker (lint, pep8, pylint...) on them 6 | # 7 | # to install type ln -s check-stage .git/hooks/pre-commit 8 | 9 | # Redirect output to stderr. 10 | exec 2>&1 11 | 12 | # set path (necessary for gitx and git-gui) 13 | export PATH=$PATH:/opt/local/bin:/opt/local/sbin:/usr/local/sbin:/usr/local/bin 14 | 15 | # necessary check for initial commit 16 | if [ git rev-parse --verify HEAD >/dev/null 2>&1 ]; then 17 | against=HEAD 18 | else 19 | # Initial commit: diff against an empty tree object 20 | against=4b825dc642cb6eb9a060e54bf8d69288fbee4904 21 | fi 22 | 23 | # set Internal Field Separator to newline (dash does not support $'\n') 24 | IFS=' 25 | ' 26 | 27 | # get a list of staged files 28 | for LINE in $(git diff-index --cached --full-index $against); do 29 | SHA=$(echo $LINE | cut -d' ' -f4) 30 | STATUS=$(echo $LINE | cut -d' ' -f5 | cut -d' ' -f1) 31 | FILENAME=$(echo $LINE | cut -d' ' -f5 | cut -d' ' -f2) 32 | FILEEXT=$(echo $FILENAME | sed 's/^.*\.//') 33 | 34 | # do not check deleted files 35 | if [ $STATUS == "D" ]; then 36 | continue 37 | fi 38 | 39 | # only check files with proper extension 40 | if [ $FILEEXT == 'php' ]; then 41 | PROGRAMS='php' 42 | COMMANDS='php -l' 43 | elif [ $FILEEXT == 'py' ]; then 44 | PROGRAMS=$'pep8\npylint' 45 | COMMANDS=$'pep8 --ignore=W191,E128' 46 | else 47 | continue 48 | fi 49 | 50 | for PROGRAM in $PROGRAMS; do 51 | test $(which $PROGRAM) 52 | 53 | if [ $? != 0 ]; then 54 | echo "$PROGRAM binary does not exist or is not in path" 55 | exit 1 56 | fi 57 | done 58 | 59 | # check the staged content for syntax errors 60 | for COMMAND in $COMMANDS; do 61 | git cat-file -p $SHA > tmp.txt 62 | RESULT=$(eval "$COMMAND tmp.txt") 63 | 64 | if [ $? != 0 ]; then 65 | echo "$COMMAND syntax check failed on $FILENAME" 66 | for LINE in $RESULT; do echo $LINE; done 67 | rm tmp.txt 68 | exit 1 69 | fi 70 | done 71 | done 72 | 73 | unset IFS 74 | rm tmp.txt 75 | 76 | # If there are whitespace errors, print the offending file names and fail. 77 | # exec git diff-index --check --cached $against -- 78 | -------------------------------------------------------------------------------- /helpers/clean: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | # remove build artifacts 5 | rm -fr build/ 6 | rm -fr dist/ 7 | rm -fr *.egg-info 8 | 9 | # remove Python file artifacts 10 | find . -name '*.pyc' -exec rm -f {} + 11 | find . -name '*.pyo' -exec rm -f {} + 12 | find . -name '*~' -exec rm -f {} + 13 | -------------------------------------------------------------------------------- /helpers/pippy: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh -u 2 | # 3 | 4 | cecho(){ 5 | GREEN="\033[0;32m" 6 | YELLOW="\033[1;33m" 7 | RED="\033[0;31m" 8 | NO_COLOR="\033[0m" 9 | printf "${!1}${2} ${NO_COLOR}\n" 10 | } 11 | 12 | beginswith() { case $2 in "$1"*) true;; *) false;; esac; } 13 | 14 | _install () { 15 | package=$1 16 | 17 | if beginswith -e "$package"; then 18 | cecho "GREEN" "installing $package normally…" 19 | pip install "$package" 20 | else 21 | cecho "GREEN" "installing $package from ${PIP_WHEEL_DIR}…" 22 | pip install --no-index --only-binary=:all: --find-links="$PIP_WHEEL_DIR" "$package" 23 | 24 | if [ $? -eq 1 ]; then 25 | cecho "YELLOW" "$package not found in cache, downloading…" 26 | 27 | if beginswith wheel "$package" || beginswith pip "$package"; then 28 | pip install -U "$package" 29 | else 30 | pip download --dest="$PIP_CACHE_DIR" "$package" 31 | pip wheel --no-index --find-links="$PIP_CACHE_DIR" --wheel-dir="$PIP_WHEEL_DIR" "$package" 32 | pip install --no-index --only-binary=:all: --find-links="$PIP_WHEEL_DIR" "$package" 33 | fi 34 | fi 35 | 36 | if [ $? -eq 1 ]; then 37 | cecho "RED" "$package has no wheel, installing normally…" 38 | pip install "$package" 39 | fi 40 | fi 41 | } 42 | 43 | parse () { 44 | file=$1 45 | 46 | if beginswith -r "$file"; then 47 | REQUIREMENTS=`echo "$file" | sed 's/^-r\s*//'` 48 | 49 | while read line; do 50 | parse $line 51 | done < "$REQUIREMENTS" 52 | else 53 | echo "$file" 54 | fi 55 | } 56 | 57 | for package in `parse "$@"`; do 58 | cecho "GREEN" "$package" 59 | _install "$package" 60 | done 61 | 62 | -------------------------------------------------------------------------------- /helpers/srcdist: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | # create a source distribution package 5 | 6 | python setup.py sdist 7 | gpg --detach-sign -a dist/*.tar.gz 8 | -------------------------------------------------------------------------------- /helpers/wheel: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # -*- coding: utf-8 -*- 3 | 4 | # create a wheel package 5 | 6 | python setup.py bdist_wheel 7 | gpg --detach-sign -a dist/*.whl 8 | -------------------------------------------------------------------------------- /manage.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # vim: sw=4:ts=4:expandtab 4 | 5 | """ A script to manage development tasks """ 6 | from os import path as p 7 | from subprocess import call, check_call, CalledProcessError 8 | from manager import Manager 9 | 10 | manager = Manager() 11 | BASEDIR = p.dirname(__file__) 12 | DEF_WHERE = ["riko", "tests", "examples", "setup.py", "manage.py"] 13 | 14 | 15 | def _upload(): 16 | """Upload distribution files""" 17 | _uploaddir = p.join(BASEDIR, "dist", "*") 18 | url = "https://upload.pypi.org/legacy/" 19 | check_call(["twine", "upload", "--repository-url", url, _uploaddir]) 20 | 21 | 22 | def _sdist(): 23 | """Create a source distribution package""" 24 | check_call(p.join(BASEDIR, "helpers", "srcdist")) 25 | 26 | 27 | def _wheel(): 28 | """Create a wheel package""" 29 | check_call(p.join(BASEDIR, "helpers", "wheel")) 30 | 31 | 32 | def _clean(): 33 | """Remove Python file and build artifacts""" 34 | check_call(p.join(BASEDIR, "helpers", "clean")) 35 | 36 | 37 | @manager.command 38 | def check(): 39 | """Check staged changes for lint errors""" 40 | exit(call(p.join(BASEDIR, "helpers", "check-stage"))) 41 | 42 | 43 | @manager.arg("where", "w", help="Modules to check") 44 | @manager.arg("strict", "s", help="Check with pylint") 45 | @manager.command 46 | def lint(where=None, strict=False): 47 | """Check style with linters""" 48 | extra = where.split(" ") if where else DEF_WHERE 49 | args = ["pylint", "--rcfile=tests/pylintrc", "-rn", "-fparseable"] 50 | 51 | try: 52 | if strict: 53 | check_call(args + extra) 54 | else: 55 | check_call(["flake8"] + extra) 56 | except CalledProcessError as e: 57 | exit(e.returncode) 58 | 59 | 60 | @manager.arg("where", "w", help="Modules to check") 61 | @manager.command 62 | def prettify(where=None): 63 | """Prettify code with black""" 64 | extra = where.split(" ") if where else DEF_WHERE 65 | 66 | try: 67 | check_call(["black"] + extra) 68 | except CalledProcessError as e: 69 | exit(e.returncode) 70 | 71 | 72 | @manager.command 73 | def require(): 74 | """Create requirements.txt""" 75 | cmd = "pip freeze -l | grep -vxFf dev-requirements.txt > requirements.txt" 76 | exit(call(cmd, shell=True)) 77 | 78 | 79 | @manager.arg("where", "w", help="test path", default=None) 80 | @manager.arg("stop", "x", help="Stop after first error", type=bool, default=False) 81 | @manager.arg("failed", "f", help="Run failed tests", type=bool, default=False) 82 | @manager.arg("cover", "c", help="Add coverage report", type=bool, default=False) 83 | @manager.arg("tox", "t", help="Run tox tests", type=bool, default=False) 84 | @manager.arg("detox", "d", help="Run detox tests", type=bool, default=False) 85 | @manager.arg("verbose", "v", help="Use detailed errors", type=bool, default=False) 86 | @manager.arg( 87 | "parallel", 88 | "p", 89 | help="Run tests in parallel in multiple processes", 90 | type=bool, 91 | default=False, 92 | ) 93 | @manager.arg("debug", "D", help="Use nose.loader debugger", type=bool, default=False) 94 | @manager.command 95 | def test(where=None, stop=None, **kwargs): 96 | """Run nose, tox, and script tests""" 97 | opts = "-xv" if stop else "-v" 98 | opts += " --with-coverage" if kwargs.get("cover") else "" 99 | opts += " --failed" if kwargs.get("failed") else " --with-id" 100 | opts += " --processes=-1" if kwargs.get("parallel") else "" 101 | opts += " --detailed-errors" if kwargs.get("verbose") else "" 102 | opts += " --debug=nose.loader" if kwargs.get("debug") else "" 103 | opts += " -w %s" % where if where else "" 104 | 105 | try: 106 | if kwargs.get("tox"): 107 | check_call("tox") 108 | elif kwargs.get("detox"): 109 | check_call("detox") 110 | else: 111 | check_call(("nosetests %s" % opts).split(" ")) 112 | except CalledProcessError as e: 113 | exit(e.returncode) 114 | 115 | 116 | @manager.command 117 | def register(): 118 | """Register package with PyPI""" 119 | exit(call("python", p.join(BASEDIR, "setup.py"), "register")) 120 | 121 | 122 | @manager.command 123 | def release(): 124 | """Package and upload a release""" 125 | try: 126 | _clean() 127 | _sdist() 128 | _wheel() 129 | _upload() 130 | except CalledProcessError as e: 131 | exit(e.returncode) 132 | 133 | 134 | @manager.command 135 | def build(): 136 | """Create a source distribution and wheel package""" 137 | try: 138 | _clean() 139 | _sdist() 140 | _wheel() 141 | except CalledProcessError as e: 142 | exit(e.returncode) 143 | 144 | 145 | @manager.command 146 | def upload(): 147 | """Upload distribution files""" 148 | try: 149 | _upload() 150 | except CalledProcessError as e: 151 | exit(e.returncode) 152 | 153 | 154 | @manager.command 155 | def sdist(): 156 | """Create a source distribution package""" 157 | try: 158 | _sdist() 159 | except CalledProcessError as e: 160 | exit(e.returncode) 161 | 162 | 163 | @manager.command 164 | def wheel(): 165 | """Create a wheel package""" 166 | try: 167 | _wheel() 168 | except CalledProcessError as e: 169 | exit(e.returncode) 170 | 171 | 172 | @manager.command 173 | def clean(): 174 | """Remove Python file and build artifacts""" 175 | try: 176 | _clean() 177 | except CalledProcessError as e: 178 | exit(e.returncode) 179 | 180 | 181 | if __name__ == "__main__": 182 | manager.main() 183 | -------------------------------------------------------------------------------- /optional-requirements.txt: -------------------------------------------------------------------------------- 1 | lxml>=4.5.0,<5.0.0 2 | treq>=18.6.0,<22.0.0 3 | Twisted>=19.10.0,<22.0.0 4 | speedparser3~=0.3.1 5 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=42.0.2", "pkutils>=3.0.0,<4.0.0"] 3 | build-backend = "setuptools.build_meta" 4 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | Babel>=2.8.0,<3.0.0 2 | chardet>=3.0.4,<4.0.0 3 | feedparser>=5.2.1,<7.0.0 4 | meza>=0.42.5,<1.0.0 5 | Mezmorize>=0.27.0,<1.0.0 6 | python-dateutil>=2.8.1,<3.0.0 7 | pygogo>=0.12.0,<2.0.0 8 | requests>=2.22.0,<3.0.0 9 | html5lib>=1.0.1,<2.0.0 10 | pytz>=2019.3 11 | -------------------------------------------------------------------------------- /riko/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # vim: sw=4:ts=4:expandtab 3 | """ 4 | riko 5 | ~~~~ 6 | Provides functions for analyzing and processing streams of structured data 7 | 8 | Examples: 9 | basic usage:: 10 | 11 | >>> from itertools import chain 12 | >>> from functools import partial 13 | >>> from riko.modules import itembuilder, strreplace 14 | >>> from riko.collections import SyncPipe 15 | >>> 16 | >>> ib_conf = { 17 | ... 'attrs': [ 18 | ... {'key': 'link', 'value': 'www.google.com', }, 19 | ... {'key': 'title', 'value': 'google', }, 20 | ... {'key': 'author', 'value': 'Tommy'}]} 21 | >>> 22 | >>> sr_conf = { 23 | ... 'rule': [{'find': 'Tom', 'param': 'first', 'replace': 'Tim'}]} 24 | >>> 25 | >>> items = itembuilder.pipe(conf=ib_conf) 26 | >>> pipe = partial(strreplace.pipe, conf=sr_conf, field='author') 27 | >>> replaced = map(pipe, items) 28 | >>> next(chain.from_iterable(replaced)) == { 29 | ... 'link': 'www.google.com', 'title': 'google', 30 | ... 'strreplace': 'Timmy', 'author': 'Tommy'} 31 | True 32 | """ 33 | from os import path as p 34 | 35 | __version__ = "0.67.0" 36 | 37 | __title__ = "riko" 38 | __package_name__ = "riko" 39 | __author__ = "Reuben Cummings" 40 | __description__ = "A stream processing engine modeled after Yahoo! Pipes." 41 | __email__ = "reubano@gmail.com" 42 | __license__ = "MIT" 43 | __copyright__ = "Copyright 2015 Reuben Cummings" 44 | 45 | PARENT_DIR = p.abspath(p.dirname(__file__)) 46 | ENCODING = "utf-8" 47 | 48 | 49 | def get_path(name): 50 | return "file://%s" % p.join(PARENT_DIR, "data", name) 51 | -------------------------------------------------------------------------------- /riko/autorss.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # vim: sw=4:ts=4:expandtab 3 | """ 4 | riko.autorss 5 | ~~~~~~~~~~~~ 6 | Provides functions for finding RSS feeds from a site's LINK tags 7 | """ 8 | import pygogo as gogo 9 | 10 | from itertools import chain 11 | from html.parser import HTMLParser 12 | 13 | from meza.compat import decode 14 | from riko.utils import fetch 15 | from riko.bado import coroutine, return_value, microdom 16 | from riko.bado.io import async_url_open 17 | 18 | TIMEOUT = 10 19 | logger = gogo.Gogo(__name__, monolog=True).logger 20 | 21 | 22 | class LinkParser(HTMLParser): 23 | def reset(self): 24 | HTMLParser.reset(self) 25 | self.entry = iter([]) 26 | 27 | def handle_starttag(self, tag, attrs): 28 | entry = dict(attrs) 29 | alternate = entry.get("rel") == "alternate" 30 | rss = "rss" in entry.get("type", "") 31 | 32 | if (alternate or rss) and "href" in entry: 33 | entry["link"] = entry["href"] 34 | entry["tag"] = tag 35 | self.entry = chain(self.entry, [entry]) 36 | 37 | 38 | def file2entries(f, parser): 39 | for line in f: 40 | parser.feed(decode(line)) 41 | 42 | for entry in parser.entry: 43 | yield entry 44 | 45 | 46 | def doc2entries(document): 47 | for node in document.childNodes: 48 | if hasattr(node, "attributes") and node.attributes: 49 | entry = node.attributes 50 | alternate = entry.get("rel") == "alternate" 51 | rss = "rss" in entry.get("type", "") 52 | else: 53 | alternate = rss = None 54 | 55 | if (alternate or rss) and "href" in entry: 56 | entry["link"] = entry["href"] 57 | entry["tag"] = node.nodeName 58 | yield entry 59 | 60 | for node in document.childNodes: 61 | for entry in doc2entries(node): 62 | yield entry 63 | 64 | 65 | @coroutine 66 | def async_get_rss(url, convert_charrefs=False): 67 | try: 68 | f = yield async_url_open(url, timeout=TIMEOUT) 69 | except ValueError: 70 | f = filter(None, url.splitlines()) 71 | 72 | document = microdom.parse(f, lenient=True) 73 | return_value(doc2entries(document)) 74 | 75 | 76 | def get_rss(url, convert_charrefs=False): 77 | try: 78 | parser = LinkParser(convert_charrefs=convert_charrefs) 79 | except TypeError: 80 | parser = LinkParser() 81 | 82 | try: 83 | f = fetch(url, timeout=TIMEOUT) 84 | except ValueError: 85 | f = filter(None, url.splitlines()) 86 | 87 | return file2entries(f, parser) 88 | -------------------------------------------------------------------------------- /riko/bado/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # vim: sw=4:ts=4:expandtab 3 | """ 4 | riko.bado 5 | ~~~~~~~~~ 6 | Provides functions for creating asynchronous riko pipes 7 | 8 | Examples: 9 | basic usage:: 10 | 11 | >>> from riko import get_path 12 | >>> from riko.bado import react 13 | """ 14 | 15 | try: 16 | from twisted.internet.task import react 17 | except ImportError: 18 | react = lambda _, _reactor=None: None 19 | inlineCallbacks = lambda _: lambda: None 20 | returnValue = lambda _: lambda: None 21 | backend = "empty" 22 | else: 23 | from twisted.internet.defer import inlineCallbacks 24 | from twisted.internet.defer import returnValue 25 | 26 | backend = "twisted" 27 | 28 | 29 | class Reactor(object): 30 | fake = False 31 | 32 | 33 | reactor = Reactor() 34 | coroutine = inlineCallbacks 35 | return_value = returnValue 36 | _issync = backend == "empty" 37 | _isasync = not _issync 38 | -------------------------------------------------------------------------------- /riko/bado/io.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # vim: sw=4:ts=4:expandtab 3 | """ 4 | riko.bado.io 5 | ~~~~~~~~~~~~ 6 | Provides functions for asynchronously reading files and urls 7 | 8 | Examples: 9 | basic usage:: 10 | 11 | >>> from riko import get_path 12 | >>> from riko.bado.io import async_url_open 13 | """ 14 | import pygogo as gogo 15 | 16 | from io import open 17 | from tempfile import NamedTemporaryFile 18 | from os import remove 19 | 20 | from meza.compat import encode 21 | 22 | from . import coroutine, return_value 23 | 24 | try: 25 | from twisted.test.proto_helpers import AccumulatingProtocol 26 | except ImportError: 27 | AccumulatingProtocol = object 28 | else: 29 | from twisted.internet.reactor import callLater 30 | from twisted.protocols.basic import FileSender 31 | from twisted.web.client import getPage, downloadPage 32 | from twisted.test.proto_helpers import StringTransport 33 | 34 | logger = gogo.Gogo(__name__, monolog=True).logger 35 | 36 | 37 | # http://stackoverflow.com/q/26314586/408556 38 | # http://stackoverflow.com/q/8157197/408556 39 | # http://stackoverflow.com/a/33708936/408556 40 | class FileReader(AccumulatingProtocol): 41 | def __init__(self, filename, transform=None, delay=0, verbose=False): 42 | self.f = open(filename, "rb") 43 | self.transform = transform 44 | self.delay = delay 45 | self.producer = FileSender() 46 | self.logger = gogo.Gogo(__name__, verbose=verbose).logger 47 | 48 | def cleanup(self, *args): 49 | self.f.close() 50 | self.producer.stopProducing() 51 | 52 | def resumeProducing(self): 53 | chunk = self.file.read(self.CHUNK_SIZE) if self.file else "" 54 | 55 | if not chunk: 56 | self.file = None 57 | self.consumer.unregisterProducer() 58 | 59 | if self.deferred and self.delay: 60 | callLater(self.delay, self.deferred.callback, self.lastSent) 61 | elif self.deferred: 62 | self.deferred.callback(self.lastSent) 63 | 64 | self.deferred = None 65 | return 66 | 67 | def connectionLost(self, reason): 68 | self.logger.debug("connectionLost: %s", reason) 69 | self.cleanup() 70 | 71 | def connectionMade(self): 72 | self.logger.debug("Connection made from %s", self.transport.getPeer()) 73 | args = (self.f, self.transport, self.transform) 74 | self.d = self.closedDeferred = self.producer.beginFileTransfer(*args) 75 | 76 | while not self.d.called: 77 | self.producer.resumeProducing() 78 | 79 | self.d.addErrback(self.logger.error) 80 | self.d.addBoth(self.cleanup) 81 | 82 | 83 | @coroutine 84 | def async_read_file(filename, transport, protocol=FileReader, **kwargs): 85 | proto = protocol(filename.replace("file://", ""), **kwargs) 86 | proto.makeConnection(transport) 87 | yield proto.d 88 | # return_value(proto.data) 89 | return_value(proto.transport.value()) 90 | 91 | 92 | @coroutine 93 | def async_get_file(filename, transport, protocol=FileReader, **kwargs): 94 | proto = protocol(filename.replace("file://", ""), **kwargs) 95 | proto.makeConnection(transport) 96 | yield proto.d 97 | proto.transport.io.seek(0) 98 | return_value(proto.transport.io) 99 | 100 | 101 | @coroutine 102 | def async_url_open(url, timeout=0, **kwargs): 103 | if url.startswith("http"): 104 | page = NamedTemporaryFile(delete=False) 105 | new_url = page.name 106 | yield downloadPage(encode(url), page, timeout=timeout) 107 | else: 108 | page, new_url = None, url 109 | 110 | f = yield async_get_file(new_url, StringTransport(), **kwargs) 111 | 112 | if not hasattr(f, "name") and url.startswith("file"): 113 | f.name = url.split("://")[1] 114 | 115 | if page: 116 | page.close() 117 | remove(page.name) 118 | 119 | return_value(f) 120 | 121 | 122 | def async_url_read(url, timeout=0, **kwargs): 123 | if url.startswith("http"): 124 | content = getPage(encode(url), timeout=timeout) 125 | else: 126 | content = async_read_file(url, StringTransport(), **kwargs) 127 | 128 | return content 129 | -------------------------------------------------------------------------------- /riko/bado/itertools.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # vim: sw=4:ts=4:expandtab 3 | """ 4 | riko.bado.itertools 5 | ~~~~~~~~~~~~~~~~~~~ 6 | Provides asynchronous ports of various builtin itertools functions 7 | 8 | Examples: 9 | basic usage:: 10 | 11 | >>> from riko import get_path 12 | >>> from riko.bado.itertools import coop_reduce 13 | """ 14 | from functools import partial 15 | 16 | import itertools as it 17 | 18 | from . import coroutine, return_value, reactor 19 | from .mock import FakeReactor 20 | 21 | try: 22 | from twisted.internet.task import Cooperator 23 | except ImportError: 24 | pass 25 | else: 26 | from twisted.internet import task as real_task 27 | from twisted.internet.defer import gatherResults 28 | 29 | 30 | def get_task(): 31 | if reactor.fake: 32 | task = Cooperator( 33 | scheduler=partial(FakeReactor().callLater, FakeReactor._DELAY) 34 | ) 35 | else: 36 | task = real_task.Cooperator() 37 | 38 | return task 39 | 40 | 41 | @coroutine 42 | def coop_reduce(func, iterable, initializer=None): 43 | task = get_task() 44 | iterable = iter(iterable) 45 | x = initializer or next(iterable) 46 | result = {} 47 | 48 | def work(func, it, x): 49 | for y in it: 50 | result["value"] = x = func(x, y) 51 | yield 52 | 53 | _task = task.cooperate(work(func, iterable, x)) 54 | yield _task.whenDone() 55 | return_value(result["value"]) 56 | 57 | 58 | def async_reduce(async_func, iterable, initializer=None): 59 | it = iter(iterable) 60 | x = initializer or next(it) 61 | 62 | @coroutine 63 | def work(async_func, it, x): 64 | for y in it: 65 | x = yield async_func(x, y) 66 | 67 | return_value(x) 68 | 69 | return work(async_func, it, x) 70 | 71 | 72 | @coroutine 73 | def async_map(async_func, iterable, connections=0): 74 | """parallel map for deferred callables using cooperative multitasking 75 | http://stackoverflow.com/a/20376166/408556 76 | """ 77 | if connections and not reactor.fake: 78 | results = [] 79 | work = (async_func(x).addCallback(results.append) for x in iterable) 80 | deferreds = [get_task().coiterate(work) for _ in range(connections)] 81 | yield gatherResults(deferreds, consumeErrors=True) 82 | else: 83 | deferreds = map(async_func, iterable) 84 | results = yield gatherResults(deferreds, consumeErrors=True) 85 | 86 | return_value(results) 87 | 88 | 89 | def async_starmap(async_func, iterable): 90 | """itertools.starmap for deferred callables""" 91 | deferreds = it.starmap(async_func, iterable) 92 | return gatherResults(deferreds, consumeErrors=True) 93 | 94 | 95 | def async_dispatch(split, *async_funcs, **kwargs): 96 | return async_starmap(lambda item, f: f(item), zip(split, async_funcs)) 97 | 98 | 99 | def async_broadcast(item, *async_funcs, **kwargs): 100 | return async_dispatch(it.repeat(item), *async_funcs, **kwargs) 101 | -------------------------------------------------------------------------------- /riko/bado/mock.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # vim: sw=4:ts=4:expandtab 3 | """ 4 | riko.bado.mock 5 | ~~~~~~~~~~~~~~ 6 | Provides classes for mocking a reactor during tests 7 | 8 | Examples: 9 | basic usage:: 10 | 11 | >>> from riko import get_path 12 | >>> from riko.bado.mock import FakeReactor 13 | """ 14 | import pygogo as gogo 15 | 16 | from . import reactor 17 | 18 | try: 19 | from twisted.test.proto_helpers import MemoryReactorClock 20 | except ImportError: 21 | MemoryReactorClock = object 22 | FakeReactor = lambda _: lambda: None 23 | 24 | logger = gogo.Gogo(__name__, monolog=True).logger 25 | 26 | 27 | class FakeReactor(MemoryReactorClock): 28 | """A fake reactor to be used in tests. This reactor doesn't actually do 29 | much that's useful yet. It accepts TCP connection setup attempts, but 30 | they will never succeed. 31 | 32 | Examples: 33 | >>> import sys 34 | >>> 35 | >>> try: 36 | ... from twisted import internet 37 | ... except ImportError: 38 | ... pass 39 | ... else: 40 | ... from twisted.internet.fdesc import readFromFD, setNonBlocking 41 | ... FileDescriptor = internet.abstract.FileDescriptor 42 | ... 43 | ... reactor = FakeReactor() 44 | ... f = FileDescriptor(reactor) 45 | ... f.fileno = sys.__stdout__.fileno 46 | ... fd = f.fileno() 47 | ... setNonBlocking(fd) 48 | ... readFromFD(fd, print) 49 | """ 50 | 51 | _DELAY = 1 52 | 53 | def __init__(self): 54 | super(FakeReactor, self).__init__() 55 | reactor.fake = True 56 | msg = "Attention! Running fake reactor." 57 | logger.debug(f"{msg} Some deferreds may not work as intended.") 58 | 59 | def callLater(self, when, what, *args, **kwargs): 60 | """Schedule a unit of work to be done later.""" 61 | delayed = super(FakeReactor, self).callLater(when, what, *args, **kwargs) 62 | self.pump() 63 | return delayed 64 | 65 | def pump(self): 66 | """Perform scheduled work""" 67 | self.advance(self._DELAY) 68 | -------------------------------------------------------------------------------- /riko/bado/requests.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # vim: sw=4:ts=4:expandtab 3 | """ 4 | riko.bado.requests 5 | ~~~~~~~~~~~~~~~~~~ 6 | Provides functions for asynchronously fetching web pages 7 | 8 | Examples: 9 | basic usage:: 10 | 11 | >>> from riko import get_path 12 | >>> from riko.bado import requests as treq 13 | """ 14 | 15 | try: 16 | import treq 17 | except ImportError: 18 | get = lambda _: lambda: None 19 | json_content = lambda _: lambda: None 20 | else: 21 | get = treq.get 22 | json = treq.json_content 23 | -------------------------------------------------------------------------------- /riko/bado/util.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # vim: sw=4:ts=4:expandtab 3 | """ 4 | riko.bado.util 5 | ~~~~~~~~~~~~~~ 6 | Provides functions for creating asynchronous riko pipes 7 | 8 | Examples: 9 | basic usage:: 10 | 11 | >>> from riko import get_path 12 | >>> from riko.bado.util import xml2etree 13 | """ 14 | from os import environ 15 | from sys import executable 16 | from functools import partial 17 | 18 | from riko.parsers import _make_content, entity2text 19 | 20 | try: 21 | from twisted.internet.defer import maybeDeferred, Deferred 22 | except ImportError: 23 | maybeDeferred = lambda *args: None 24 | else: 25 | from twisted.internet import defer 26 | from twisted.internet.utils import getProcessOutput 27 | from twisted.internet.reactor import callLater 28 | 29 | from . import microdom 30 | from .microdom import EntityReference 31 | 32 | async_none = defer.succeed(None) 33 | async_return = partial(defer.succeed) 34 | async_partial = lambda f, **kwargs: partial(maybeDeferred, f, **kwargs) 35 | 36 | 37 | def async_sleep(seconds): 38 | d = Deferred() 39 | callLater(seconds, d.callback, None) 40 | return d 41 | 42 | 43 | def defer_to_process(command): 44 | return getProcessOutput(executable, ["-c", command], environ) 45 | 46 | 47 | def xml2etree(f, xml=True): 48 | readable = hasattr(f, "read") 49 | 50 | if xml and readable: 51 | parse = microdom.parseXML 52 | elif readable: 53 | parse = partial(microdom.parse, lenient=True) 54 | elif xml: 55 | parse = microdom.parseXMLString 56 | else: 57 | parse = partial(microdom.parseString, lenient=True) 58 | 59 | return parse(f) 60 | 61 | 62 | def etree2dict(element, tag="content"): 63 | """Convert a microdom element tree into a dict imitating how Yahoo Pipes 64 | does it. 65 | 66 | TODO: checkout twisted.words.xish 67 | """ 68 | i = dict(element.attributes) if hasattr(element, "attributes") else {} 69 | value = element.nodeValue if hasattr(element, "nodeValue") else None 70 | 71 | if isinstance(element, EntityReference): 72 | value = entity2text(value) 73 | 74 | i.update(_make_content(i, value, tag)) 75 | 76 | for child in element.childNodes: 77 | tag = child.tagName if hasattr(child, "tagName") else "content" 78 | value = etree2dict(child, tag) 79 | 80 | # try to join the content first since microdom likes to split up 81 | # elements that contain a mix of text and entity reference 82 | try: 83 | i.update(_make_content(i, value, tag, append=False)) 84 | except TypeError: 85 | i.update(_make_content(i, value, tag)) 86 | 87 | if ("content" in i) and not set(i).difference(["content"]): 88 | # element is leaf node and doesn't have attributes 89 | i = i["content"] 90 | 91 | return i 92 | -------------------------------------------------------------------------------- /riko/data/capnorth.xml: -------------------------------------------------------------------------------- 1 | 2 | PAAQ3703390999 3 | PAAQ@nwws.oes.ca.gov 4 | 2007-04-01T20:32:26-07:00 5 | Actual 6 | Alert 7 | Public 8 | WEPA41 PAAQ 020332 9 | TSUWCA 10 | 11 | Geo 12 | 13 | Future 14 | Severe 15 | Possible 16 | 2007-04-02T00:32:26-07:00 17 | 18 | THIS TSUNAMI ADVISORY IS FOR ALASKA/ BRITISH COLUMBIA/ WASHINGTON/ OREGON 19 | AND CALIFORNIA ONLY 20 | 21 | NO - REPEAT NO - WATCH OR WARNING IS IN EFFECT FOR THE STATES 22 | AND PROVINCES LISTED ABOVE. 23 | A TSUNAMI HAS BEEN OBSERVED AT THE FOLLOWING SITES 24 | LOCATION LAT LON TIME AMPL 25 | ------------------------ ----- ------ ------- ----------- 26 | HONIARA SOLOMAN ISLAND 9.4S 159.9E 2252UTC 0.21M/0.7FT 27 | PORT VILA VANUATU 17.8S 168.3E 2327UTC 0.12M/0.4FT 28 | MANUS PAPUA NEW GUINEA 2.0S 147.4E 0017UTC 0.09M/0.3FT 29 | CAPE FERGUSON AUST. 19.3S 147.1E 0135UTC 0.11M/0.4FT 30 | THE TSUNAMI HAS NOT BEEN OBSERVED ON TIDE GAGES TO THE NORTH 31 | OF THE SOLOMON ISLANDS. FORECAST MODELS INDICATE THE TSUNAMI 32 | ENERGY WILL BE MAINLY CONTAINED TO THE SOUTH OF THE SOLOMON 33 | ISLANDS. THE PACIFIC TSUNAMI WARNING CENTER HAS RECEIVED REPORTS 34 | OF TSUNAMI-RELATED FATALITIES IN SE PAPUA NG AND THE SOLOMON IS. 35 | TIME - TIME OF MEASUREMENT 36 | AMPL - TSUNAMI AMPLITUDES ARE MEASURED RELATIVE TO NORMAL SEA 37 | LEVEL. THESE ARE NOT CREST-TO-TROUGH HEIGHTS. 38 | EVALUATION 39 | BASED ON THE LOCATION - MAGNITUDE AND HISTORIC TSUNAMI RECORDS 40 | THE EARTHQUAKE WAS NOT SUFFICIENT TO GENERATE A TSUNAMI DAMAGING 41 | TO CALIFORNIA/ OREGON/ WASHINGTON/ BRITISH COLUMBIA OR ALASKA. 42 | SOME OF THESE AREAS MAY EXPERIENCE NON-DAMAGING SEA LEVEL CHANGES. 43 | PRELIMINARY EARTHQUAKE PARAMETERS 44 | MAGNITUDE - 8.1 45 | TIME - 1240 AKDT APR 01 2007 46 | 1340 PDT APR 01 2007 47 | 2040 UTC APR 01 2007 48 | LOCATION - 8.6 SOUTH 157.2 EAST 49 | - SOLOMON ISLANDS 50 | DEPTH - 17 MILES/28 KM 51 | THE PACIFIC TSUNAMI WARNING CENTER IN EWA BEACH HAWAII HAS 52 | ISSUED A TSUNAMI WARNING FOR AREAS OF THE PACIFIC OUTSIDE OF 53 | CALIFORNIA/ OREGON/ WASHINGTON/ BRITISH COLUMBIA AND ALASKA. 54 | TSUNAMI ADVISORIES ARE ISSUED TO REGIONS NOT PRESENTLY IN A 55 | TSUNAMI WARNING OR WATCH WHEN A TSUNAMI WARNING HAS BEEN ISSUED 56 | FOR OTHER AREAS OF THE PACIFIC. NO TSUNAMI WARNING OR WATCH 57 | IS IN EFFECT FOR CALIFORNIA/ OREGON/ WASHINGTON/ BRITISH 58 | COLUMBIA AND ALASKA. 59 | 60 | 61 | route 62 | NCS-NORTH.localhost.localdomain;capserverNorth 63 | 64 | 65 | TSUNAMI MESSAGE NUMBER 7 NWS WEST COAST/ALASKA TSUNAMI WARNING CENTER PALMER AK 66 | 41.857864,-124.45808 41.99844,-124.16689 41.9884,-123.98616 41.797623,-123.98616 41.4964,-123.88574 41.024487,-123.84558 41.064648,-123.88574 40.422043,-124.15685 39.79952,-123.654816 39.036423,-123.52428 38.49423,-122.982086 38.18296,-122.67082 38.29341,-122.38969 38.223125,-122.11858 37.881744,-122.178825 37.881744,-122.178825 37.40983,-121.87762 37.40983,-121.87762 37.23914,-122.05834 36.978077,-121.606514 36.43588,-121.646675 35.200867,-120.48195 34.64863,-120.40163 34.5181,-119.50799 34.176716,-118.45373 33.915657,-118.29307 33.75501,-117.80107 33.092316,-117.1384 32.540077,-117.00786 32.509956,-118.16254 33.1124,-119.62849 33.98594,-120.55223 33.98594,-120.55223 37.751213,-123.34355 40.341717,-124.59864 41.857864,-124.45808 67 | 68 | UGC 69 | PKZ176-175-170>172-155-150-132-136>138-141-140-120-121-125>130- 051>053-041>043-011>013-021-022-031>036-PZZ130>135-150-153-156- 110-250-210-255-350-353-356-450-455-550-530-535-555-670-673-650- 655-750-AKZ191-185-181-171-145-111-101-121-125-131-135-017>029- WAZ001-503-506>511-514>517-021-ORZ001-002-021-022-CAZ001-002- 505-506-006-508-509-514-515-009-034-035-039>046-087-020502- 70 | 71 | 72 | 73 | 74 | -------------------------------------------------------------------------------- /riko/data/currencies.csv: -------------------------------------------------------------------------------- 1 | code,location 2 | AED,United Arab Emirates 3 | AFN,Afghanistan 4 | ALL,Albania 5 | AMD,Armenia 6 | ANG,Curaçao 7 | AOA,Angola 8 | ARS,Argentina 9 | AUD,Australia 10 | AWG,Aruba 11 | AZN,Azerbaijan 12 | BAM,Bosnia and Herzegovina 13 | BBD,Barbados 14 | BDT,Bangladesh 15 | BGN,Bulgaria 16 | BHD,Bahrain 17 | BIF,Burundi 18 | BMD,Bermuda 19 | BND,Brunei 20 | BOB,Bolivia 21 | BOV,Bolivia 22 | BRL,Brazil 23 | BSD,Bahamas 24 | BTN,Bhutan 25 | BWP,Botswana 26 | BYN,Belarus 27 | BYR,Belarus 28 | BZD,Belize 29 | CAD,Canada 30 | CDF,Democratic Republic of the Congo 31 | CHE,Switzerland 32 | CHF,Switzerland 33 | CHW,Switzerland 34 | CLF,Chile 35 | CLP,Chile 36 | CNY,China 37 | COP,Colombia 38 | COU,Colombia 39 | CRC,Costa Rica 40 | CUC,Cuba 41 | CUP,Cuba 42 | CVE,Cape Verde 43 | CZK,Czech Republic 44 | DJF,Djibouti 45 | DKK,Denmark 46 | DOP,Dominican Republic 47 | DZD,Algeria 48 | EGP,Egypt 49 | ERN,Eritrea 50 | ETB,Ethiopia 51 | EUR,European Union 52 | FJD,Fiji 53 | FKP,Falkland Islands 54 | GBP,United Kingdom 55 | GEL,Georgia 56 | GHS,Ghana 57 | GIP,Gibraltar 58 | GMD,Gambia 59 | GNF,Guinea 60 | GTQ,Guatemala 61 | GYD,Guyana 62 | HKD,Hong Kong 63 | HNL,Honduras 64 | HRK,Croatia 65 | HTG,Haiti 66 | HUF,Hungary 67 | IDR,Indonesia 68 | ILS,Israel 69 | INR,India 70 | IQD,Iraq 71 | IRR,Iran 72 | ISK,Iceland 73 | JMD,Jamaica 74 | JOD,Jordan 75 | JPY,Japan 76 | KES,Kenya 77 | KGS,Kyrgyzstan 78 | KHR,Cambodia 79 | KMF,Comoros 80 | KPW,North Korea 81 | KRW,South Korea 82 | KWD,Kuwait 83 | KYD,Cayman Islands 84 | KZT,Kazakhstan 85 | LAK,Laos 86 | LBP,Lebanon 87 | LKR,Sri Lanka 88 | LRD,Liberia 89 | LSL,Lesotho 90 | LYD,Libya 91 | MAD,Morocco 92 | MDL,Moldova 93 | MGA,Madagascar 94 | MKD,Macedonia 95 | MMK,Myanmar 96 | MNT,Mongolia 97 | MOP,Macao 98 | MRO,Mauritania 99 | MUR,Mauritius 100 | MVR,Maldives 101 | MWK,Malawi 102 | MXN,Mexico 103 | MXV,Mexico 104 | MYR,Malaysia 105 | MZN,Mozambique 106 | NAD,Namibia 107 | NGN,Nigeria 108 | NIO,Nicaragua 109 | NOK,Norway 110 | NPR,Nepal 111 | NZD,New Zealand 112 | OMR,Oman 113 | PAB,Panama 114 | PEN,Peru 115 | PGK,Papua New Guinea 116 | PHP,Philippines 117 | PKR,Pakistan 118 | PLN,Poland 119 | PYG,Paraguay 120 | QAR,Qatar 121 | RON,Romania 122 | RSD,Serbia 123 | RUB,Russia 124 | RWF,Rwanda 125 | SAR,Saudi Arabia 126 | SBD,Solomon Islands 127 | SCR,Seychelles 128 | SDG,Sudan 129 | SEK,Sweden 130 | SGD,Singapore 131 | SHP,Saint Helena 132 | SLL,Sierra Leone 133 | SOS,Somalia 134 | SRD,Suriname 135 | SSP,South Sudan 136 | STD,São Tomé and Príncipe 137 | SVC,El Salvador 138 | SYP,Syria 139 | SZL,Swaziland 140 | THB,Thailand 141 | TJS,Tajikistan 142 | TMT,Turkmenistan 143 | TND,Tunisia 144 | TOP,Tonga 145 | TRY,Turkey 146 | TTD,Trinidad and Tobago 147 | TWD,Taiwan 148 | TZS,Tanzania 149 | UAH,Ukraine 150 | UGX,Uganda 151 | USD,United States 152 | USN,United States 153 | UYI,Uruguay 154 | UYU,Uruguay 155 | UZS,Uzbekistan 156 | VEF,Venezuela 157 | VND,Vietnam 158 | VUV,Vanuatu 159 | WST,Samoa 160 | XAF,Cameroon 161 | XCD,Anguilla 162 | XOF,Benin 163 | XPF,French Polynesia 164 | YER,Yemen 165 | ZAR,South Africa 166 | ZMW,Zambia 167 | ZWL,Zimbabwe 168 | -------------------------------------------------------------------------------- /riko/data/fourtitude.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Fourtitude.com 5 | http://www.fourtitude.com 6 | The site for the Audi Enthusiast 7 | en-us 8 | (C) 2005 Vortex Media Group Inc. 9 | info@fourtitude.com 10 | info@fourtitude.com 11 | 5 12 | 13 | Fourtitude.com 14 | 144 15 | 31 16 | http://www.fourtitde.com 17 | http://www.fourtitude.com/logorss.gif 18 | 19 | 20 | Audi Sport Travels to British DTM Round with Fondest Memories 21 | http://www.fourtitude.com/news/publish/Motorsport_News/article_7760.shtml 22 | May 11, 2012 10:01:00 EST 23 | 24 | 25 | 26 | Motorsport News 27 | http://www.fourtitude.com/news/publish/Motorsport_News/article_7760.shtml 28 | 29 | 30 | Audi Hungaria Celebrates Roofing Ceremony for New A3 Plant 31 | http://www.fourtitude.com/news/publish/Audi_News/article_7759.shtml 32 | May 11, 2012 09:53:00 EST 33 | 34 | 35 | 36 | Audi News 37 | http://www.fourtitude.com/news/publish/Audi_News/article_7759.shtml 38 | 39 | 40 | Preview: Audi at the Worthersee Tour 41 | http://www.fourtitude.com/news/publish/Audi_News/article_7758.shtml 42 | May 11, 2012 09:33:00 EST 43 | 44 | 45 | 46 | Audi News 47 | http://www.fourtitude.com/news/publish/Audi_News/article_7758.shtml 48 | 49 | 50 | Audi e-bike Worthersee: High-End Sports Machine 51 | http://www.fourtitude.com/news/publish/Audi_News/article_7757.shtml 52 | May 11, 2012 09:05:00 EST 53 | 54 | 55 | 56 | Audi News 57 | http://www.fourtitude.com/news/publish/Audi_News/article_7757.shtml 58 | 59 | 60 | Pattern of Growth Continues for Audi Group: Record Highs in Deliveries and Earnings 61 | http://www.fourtitude.com/news/publish/Audi_News/article_7756.shtml 62 | May 11, 2012 08:26:00 EST 63 | 64 | 65 | 66 | Audi News 67 | http://www.fourtitude.com/news/publish/Audi_News/article_7756.shtml 68 | 69 | 70 | The New Audi Environmental Magazine: Step by Step to a CO2-Neutral Site 71 | http://www.fourtitude.com/news/publish/Audi_News/article_7755.shtml 72 | May 11, 2012 08:19:00 EST 73 | 74 | 75 | 76 | Audi News 77 | http://www.fourtitude.com/news/publish/Audi_News/article_7755.shtml 78 | 79 | 80 | Audi ultra: Toward Le Mans Victory with Lightweight Design 81 | http://www.fourtitude.com/news/publish/Motorsport_News/article_7754.shtml 82 | May 11, 2012 08:07:00 EST 83 | 84 | 85 | 86 | Motorsport News 87 | http://www.fourtitude.com/news/publish/Motorsport_News/article_7754.shtml 88 | 89 | 90 | High-Voltage Battery Technology at Audi: Core Competence in Ingolstadt 91 | http://www.fourtitude.com/news/publish/Audi_News/article_7753.shtml 92 | May 11, 2012 07:57:00 EST 93 | 94 | 95 | 96 | Audi News 97 | http://www.fourtitude.com/news/publish/Audi_News/article_7753.shtml 98 | 99 | 100 | Audi AG: North American Growth Region with Significantly Increased Sales 101 | http://www.fourtitude.com/news/publish/Audi_News/article_7752.shtml 102 | May 11, 2012 07:47:00 EST 103 | 104 | 105 | 106 | Audi News 107 | http://www.fourtitude.com/news/publish/Audi_News/article_7752.shtml 108 | 109 | 110 | Audi Continues Progress on High-Voltage Battery Project House at Ingolstadt/Gaimersheim Site 111 | http://www.fourtitude.com/news/publish/Audi_News/article_7751.shtml 112 | May 11, 2012 07:37:00 EST 113 | 114 | 115 | 116 | Audi News 117 | http://www.fourtitude.com/news/publish/Audi_News/article_7751.shtml 118 | 119 | 120 | 121 | -------------------------------------------------------------------------------- /riko/data/lorem.txt: -------------------------------------------------------------------------------- 1 | What is Lorem Ipsum? 2 | Lorem Ipsum is simply dummy text of the printing and typesetting industry. 3 | Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, 4 | when an unknown printer took a galley of type and scrambled it to make a type 5 | specimen book. It has survived not only five centuries, but also the leap 6 | into electronic typesetting, remaining essentially unchanged. It was 7 | popularised in the 1960s with the release of Letraset sheets containing Lorem 8 | Ipsum passages, and more recently with desktop publishing software like Aldus 9 | PageMaker including versions of Lorem Ipsum. 10 | 11 | Why do we use it? 12 | It is a long established fact that a reader will be distracted by the readable 13 | content of a page when looking at its layout. The point of using Lorem Ipsum 14 | is that it has a more-or-less normal distribution of letters, as opposed to 15 | using 'Content here, content here', making it look like readable English. Many 16 | desktop publishing packages and web page editors now use Lorem Ipsum as their 17 | default model text, and a search for 'lorem ipsum' will uncover many web sites 18 | still in their infancy. Various versions have evolved over the years, sometimes 19 | by accident, sometimes on purpose (injected humour and the like). 20 | 21 | 22 | Where does it come from? 23 | Contrary to popular belief, Lorem Ipsum is not simply random text. It has roots 24 | in a piece of classical Latin literature from 45 BC, making it over 2000 years 25 | old. Richard McClintock, a Latin professor at Hampden-Sydney College in Virginia, 26 | looked up one of the more obscure Latin words, consectetur, from a Lorem Ipsum 27 | passage, and going through the cites of the word in classical literature, 28 | discovered the undoubtable source. Lorem Ipsum comes from sections 1.10.32 and 29 | 1.10.33 of "de Finibus Bonorum et Malorum" (The Extremes of Good and Evil) by 30 | Cicero, written in 45 BC. This book is a treatise on the theory of ethics, very 31 | popular during the Renaissance. The first line of Lorem Ipsum, "Lorem ipsum dolor 32 | sit amet..", comes from a line in section 1.10.32. -------------------------------------------------------------------------------- /riko/data/places.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 1181251680 5 | 040000008200E000 6 | 1181572063 7 | 8 | West Virginia 9 | Wisconsin 10 | Wyoming 11 | Puerto Rico 12 | U.S. Virgin Islands 13 | 14 | 1800 15 | Bring pizza home 16 | 17 | 18 | 1234360800 19 | 604f4792-eb89-478b-a14f-dd34d3cc6c21-1234360800 20 | 1181572063 21 | 22 | Arusha 23 | Nairobi 24 | Joburg 25 | Moshi 26 | Kampala 27 | 28 | 1800 29 | Check MS Office website for updates 30 | 31 | 32 | -------------------------------------------------------------------------------- /riko/data/quote.json: -------------------------------------------------------------------------------- 1 | { 2 | "disclaimer": "Usage subject to terms: https://openexchangerates.org/terms", 3 | "license": "https://openexchangerates.org/license", 4 | "timestamp": 1534633200, 5 | "base": "USD", 6 | "rates": { 7 | "AED": 3.673181, 8 | "AFN": 72.755844, 9 | "ALL": 110, 10 | "AMD": 482.128618, 11 | "ANG": 1.844499, 12 | "AOA": 268.6005, 13 | "ARS": 29.75, 14 | "AUD": 1.3668, 15 | "AWG": 1.793003, 16 | "AZN": 1.7025, 17 | "BAM": 1.717049, 18 | "BBD": 2, 19 | "BDT": 84.493, 20 | "BGN": 1.70985, 21 | "BHD": 0.37717, 22 | "BIF": 1785, 23 | "BMD": 1, 24 | "BND": 1.510506, 25 | "BOB": 6.909307, 26 | "BRL": 3.910602, 27 | "BSD": 1, 28 | "BTC": 0.000156174096, 29 | "BTN": 70.07086, 30 | "BWP": 10.877472, 31 | "BYN": 2.049958, 32 | "BZD": 2.009488, 33 | "CAD": 1.305995, 34 | "CDF": 1615, 35 | "CHF": 0.99561, 36 | "CLF": 0.02338, 37 | "CLP": 667.7, 38 | "CNH": 6.834585, 39 | "CNY": 6.87455, 40 | "COP": 3005.577576, 41 | "CRC": 567.209185, 42 | "CUC": 1, 43 | "CUP": 25.5, 44 | "CVE": 97.1815, 45 | "CZK": 22.4652, 46 | "DJF": 178, 47 | "DKK": 6.5196, 48 | "DOP": 49.755, 49 | "DZD": 118.820391, 50 | "EGP": 17.879, 51 | "ERN": 14.994033, 52 | "ETB": 27.695, 53 | "EUR": 0.873325, 54 | "FJD": 2.125348, 55 | "FKP": 0.78419, 56 | "GBP": 0.78419, 57 | "GEL": 2.477417, 58 | "GGP": 0.78419, 59 | "GHS": 4.875, 60 | "GIP": 0.78419, 61 | "GMD": 48.175, 62 | "GNF": 8935, 63 | "GTQ": 7.48659, 64 | "GYD": 208.499583, 65 | "HKD": 7.850834, 66 | "HNL": 24.030079, 67 | "HRK": 6.484862, 68 | "HTG": 67.361924, 69 | "HUF": 282.19, 70 | "IDR": 14336.481583, 71 | "ILS": 3.660335, 72 | "IMP": 0.78419, 73 | "INR": 69.795, 74 | "IQD": 1190, 75 | "IRR": 43156.422013, 76 | "ISK": 107.349979, 77 | "JEP": 0.78419, 78 | "JMD": 134.95, 79 | "JOD": 0.710507, 80 | "JPY": 110.50517651, 81 | "KES": 100.81, 82 | "KGS": 68.137481, 83 | "KHR": 4071, 84 | "KMF": 431.70233, 85 | "KPW": 900, 86 | "KRW": 1119.5, 87 | "KWD": 0.303548, 88 | "KYD": 0.833077, 89 | "KZT": 359.95, 90 | "LAK": 8520, 91 | "LBP": 1514.809961, 92 | "LKR": 160.430887, 93 | "LRD": 154.249852, 94 | "LSL": 13.421367, 95 | "LYD": 1.39, 96 | "MAD": 9.5275, 97 | "MDL": 16.650107, 98 | "MGA": 3315, 99 | "MKD": 53.934737, 100 | "MMK": 1489.657202, 101 | "MNT": 2442.166667, 102 | "MOP": 8.08326, 103 | "MRO": 357.5, 104 | "MRU": 35.97, 105 | "MUR": 34.649, 106 | "MVR": 15.450044, 107 | "MWK": 727.061323, 108 | "MXN": 18.8935, 109 | "MYR": 4.102481, 110 | "MZN": 59.041109, 111 | "NAD": 14.534635, 112 | "NGN": 361.01, 113 | "NIO": 31.86, 114 | "NOK": 8.450943, 115 | "NPR": 112.109955, 116 | "NZD": 1.506932, 117 | "OMR": 0.385058, 118 | "PAB": 1, 119 | "PEN": 3.323069, 120 | "PGK": 3.3172, 121 | "PHP": 53.294962, 122 | "PKR": 123.683333, 123 | "PLN": 3.753402, 124 | "PYG": 5756.00401, 125 | "QAR": 3.641, 126 | "RON": 4.068601, 127 | "RSD": 103.227962, 128 | "RUB": 67.0075, 129 | "RWF": 865.75, 130 | "SAR": 3.75055, 131 | "SBD": 7.88911, 132 | "SCR": 13.588915, 133 | "SDG": 18.02, 134 | "SEK": 9.1571, 135 | "SGD": 1.3713, 136 | "SHP": 0.78419, 137 | "SLL": 6542.71, 138 | "SOS": 578.5, 139 | "SRD": 7.458, 140 | "SSP": 130.2634, 141 | "STD": 21050.59961, 142 | "STN": 21.55, 143 | "SVC": 8.748373, 144 | "SYP": 514.85499, 145 | "SZL": 14.534541, 146 | "THB": 33.158, 147 | "TJS": 9.413073, 148 | "TMT": 3.499986, 149 | "TND": 2.755293, 150 | "TOP": 2.310538, 151 | "TRY": 6.013976, 152 | "TTD": 6.73925, 153 | "TWD": 30.725347, 154 | "TZS": 2282.465888, 155 | "UAH": 27.689519, 156 | "UGX": 3747.579147, 157 | "USD": 1, 158 | "UYU": 31.568695, 159 | "UZS": 7800, 160 | "VEF": 141572.666667, 161 | "VND": 23106.485172, 162 | "VUV": 108.499605, 163 | "WST": 2.588533, 164 | "XAF": 572.863647, 165 | "XAG": 0.06756834, 166 | "XAU": 0.00078, 167 | "XCD": 2.70255, 168 | "XDR": 0.71658, 169 | "XOF": 572.863647, 170 | "XPD": 0.00109045, 171 | "XPF": 104.215394, 172 | "XPT": 0.00126562, 173 | "YER": 250.350747, 174 | "ZAR": 14.56358, 175 | "ZMW": 10.247, 176 | "ZWL": 322.355011 177 | } 178 | } 179 | -------------------------------------------------------------------------------- /riko/data/schools.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Turkana 6 | 94.3 7 | 481442 8 | 9 | 10 | Marsabit 11 | 91.7 12 | 118786 13 | 14 | 15 | Mandera 16 | 87.8 17 | 225812 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /riko/data/users.jyu.fi.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | HTML tidy service 6 | 7 | 8 | 9 | 10 | 11 |

12 | W3C 13 |

14 |

Tidy your HTML

15 |
16 |

Address of document to tidy: 17 | 18 |

19 |

20 | 22 |

23 |

24 | (may lead to loss of parts of the originating document if too ill-formed)

26 |

27 | 28 |

29 |
30 |
31 |

Stuff used to build this service

32 | 37 |

See also the underlying Python script.

38 |
39 | script $Revision: 1.15 $ of $Date: 2010/11/22 16:44:06 $ 40 |
by Dan Connolly 41 |
Further developed and maintained by Dominique Hazael-Massieux 42 |
43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /riko/data/yql.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 1181251680 6 | 040000008200E000 7 | 1181572063 8 | Wisconsin 9 | 1800 10 | Bring pizza home 11 | 12 | 13 | 1234360800 14 | 604f4792-eb89-478b-a14f-dd34d3cc6c21-1234360800 15 | 1181572063 16 | Nairobi 17 | 1800 18 | Check MS Office website for updates 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /riko/dates.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # vim: sw=4:ts=4:expandtab 3 | """ 4 | riko.dates 5 | ~~~~~~~~~~ 6 | Provides date and time helpers 7 | """ 8 | from datetime import timedelta, datetime as dt 9 | from time import strptime 10 | 11 | import pytz 12 | 13 | from pytz import utc 14 | from dateutil.tz import gettz, tzoffset 15 | 16 | DATE_FORMAT = "%m/%d/%Y" 17 | DATETIME_FORMAT = "{0} %H:%M:%S".format(DATE_FORMAT) 18 | TIMEOUT = 60 * 60 * 1 19 | HALF_DAY = 60 * 60 * 12 20 | TODAY = dt.utcnow() 21 | 22 | 23 | def gen_tzinfos(): 24 | for zone in pytz.common_timezones: 25 | try: 26 | tzdate = pytz.timezone(zone).localize(dt.utcnow(), is_dst=None) 27 | except pytz.NonExistentTimeError: 28 | pass 29 | else: 30 | tzinfo = gettz(zone) 31 | 32 | if tzinfo: 33 | yield tzdate.tzname(), tzinfo 34 | 35 | 36 | def get_date(unit, count, op): 37 | new_month = op(TODAY.month, count) % 12 or 12 38 | 39 | DATES = { 40 | "seconds": op(TODAY, timedelta(seconds=count)), 41 | "minutes": op(TODAY, timedelta(minutes=count)), 42 | "hours": op(TODAY, timedelta(hours=count)), 43 | "days": op(TODAY, timedelta(days=count)), 44 | "weeks": op(TODAY, timedelta(weeks=count)), 45 | "months": TODAY.replace(month=new_month), 46 | "years": TODAY.replace(year=op(TODAY.year, count)), 47 | } 48 | 49 | return DATES[unit] 50 | 51 | 52 | def normalize_date(date): 53 | try: 54 | # See if date is a `time.struct_time` 55 | # if so, convert it and account for leapseconds 56 | tt, date = date, dt(*date[:5] + (min(date[5], 59),)) 57 | except TypeError: 58 | pass 59 | else: 60 | is_dst = None if tt[8] == -1 else tt[8] 61 | 62 | try: 63 | tm_zone = tt.tm_zone 64 | except AttributeError: 65 | tm_zone = None 66 | tm_gmtoff = None 67 | else: 68 | tm_gmtoff = tt.tm_gmtoff 69 | 70 | if tm_zone: 71 | date = pytz.timezone(tm_zone).localize(date, is_dst=is_dst) 72 | elif tm_gmtoff: 73 | offset = tzoffset(None, tm_gmtoff) 74 | date.replace(tzinfo=offset) 75 | 76 | # Set timezone to UTC 77 | try: 78 | tzdate = date.astimezone(utc) if date.tzinfo else utc.localize(date) 79 | except AttributeError: 80 | tzdate = date 81 | 82 | return tzdate 83 | 84 | 85 | def get_tt(date): 86 | formatted = "".join(date.isoformat().rsplit(":", 1)) 87 | sformat = "%Y-%m-%d" if len(formatted) == 10 else "%Y-%m-%dT%H:%M:%S%z" 88 | 89 | try: 90 | tt = strptime(formatted, sformat) 91 | except ValueError: 92 | tt = strptime(formatted[:19], "%Y-%m-%dT%H:%M:%S") 93 | 94 | return tt 95 | -------------------------------------------------------------------------------- /riko/dotdict.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # vim: sw=4:ts=4:expandtab 3 | """ 4 | riko.dotdict 5 | ~~~~~~~~~~~~ 6 | Provides a class for creating dicts with dot notation access 7 | """ 8 | import pygogo as gogo 9 | 10 | from functools import reduce 11 | 12 | logger = gogo.Gogo(__name__, monolog=True).logger 13 | 14 | 15 | class DotDict(dict): 16 | """A dictionary whose keys can be accessed using dot notation 17 | >>> r = DotDict({'a': {'content': 'value'}}) 18 | >>> r.get('a.content') == 'value' 19 | True 20 | >>> r['a.content'] == 'value' 21 | True 22 | """ 23 | 24 | def __init__(self, data=None, **kwargs): 25 | self.update(data) 26 | 27 | def _parse_key(self, key=None): 28 | try: 29 | keys = key.rstrip(".").split(".") if key else [] 30 | except AttributeError: 31 | keys = [key["subkey"]] if key else [] 32 | 33 | return keys 34 | 35 | def _parse_value(self, value, key, default=None): 36 | try: 37 | parsed = value[key] 38 | except KeyError: 39 | try: 40 | parsed = value["value"] 41 | except KeyError: 42 | parsed = default 43 | except (TypeError, IndexError): 44 | if hasattr(value, "append"): 45 | parsed = [v[key] for v in value] 46 | else: 47 | parsed = value 48 | 49 | return default if parsed is None else parsed 50 | 51 | def __getitem__(self, key): 52 | keys = self._parse_key(key) 53 | value = super(DotDict, self).__getitem__(keys[0]) 54 | 55 | if len(keys) > 1: 56 | return value[".".join(keys[1:])] 57 | elif hasattr(value, "keys") and "value" in value: 58 | value = value["value"] 59 | 60 | return DotDict(value) if hasattr(value, "keys") else value 61 | 62 | def get(self, key=None, default=None, **kwargs): 63 | keys = self._parse_key(key) 64 | value = DotDict(self.copy()) 65 | 66 | for key in keys: 67 | try: 68 | key = int(key) 69 | except ValueError: 70 | pass 71 | 72 | value = self._parse_value(value, key, default) 73 | 74 | if hasattr(value, "keys") and "terminal" in value: 75 | # value fed in from another module 76 | stream = kwargs[value["terminal"]] 77 | value = next(stream)[value.get("path", "content")] 78 | elif hasattr(value, "keys") and "value" in value: 79 | value = value["value"] 80 | 81 | return DotDict(value) if hasattr(value, "keys") else value 82 | 83 | def delete(self, key): 84 | keys = self._parse_key(key) 85 | last = keys[-1] 86 | 87 | try: 88 | del reduce(lambda i, k: DotDict(i).get(k), [self] + keys[:-1])[last] 89 | except KeyError: 90 | pass 91 | 92 | def set(self, key, value): 93 | keys = self._parse_key(key) 94 | first = keys[:-1] 95 | last = keys[-1] 96 | item = self.copy() 97 | reduce(lambda i, k: i.setdefault(k, {}), first, item)[last] = value 98 | super(DotDict, self).update(item) 99 | 100 | def update(self, data=None): 101 | if not data: 102 | return 103 | 104 | _dict = dict(data) 105 | dot_keys = [d for d in _dict if "." in d] 106 | 107 | if dot_keys: 108 | # skip key if a subkey redefines it 109 | # i.e., 'author.name' has precedence over 'author' 110 | keys = [".".join(self._parse_key(dk)[:-1]) for dk in dot_keys] 111 | items = ((k, v) for k, v in _dict.items() if k not in keys) 112 | else: 113 | items = _dict.items() 114 | 115 | [self.set(key, value) for key, value in items] 116 | -------------------------------------------------------------------------------- /riko/modules/count.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # vim: sw=4:ts=4:expandtab 3 | """ 4 | riko.modules.count 5 | ~~~~~~~~~~~~~~~~~~ 6 | Provides functions for counting the number of items in a stream. 7 | 8 | Examples: 9 | basic usage:: 10 | 11 | >>> from riko.modules.count import pipe 12 | >>> 13 | >>> next(pipe({'x': x} for x in range(5))) == {'count': 5} 14 | True 15 | 16 | Attributes: 17 | OPTS (dict): The default pipe options 18 | DEFAULTS (dict): The default parser options 19 | """ 20 | import itertools as it 21 | import pygogo as gogo 22 | 23 | from operator import itemgetter 24 | 25 | from . import operator 26 | 27 | OPTS = {"extract": "count_key"} 28 | DEFAULTS = {"count_key": None} 29 | logger = gogo.Gogo(__name__, monolog=True).logger 30 | 31 | 32 | def parser(stream, key, tuples, **kwargs): 33 | """Parses the pipe content 34 | 35 | Args: 36 | stream (Iter[dict]): The source. Note: this shares the `tuples` 37 | iterator, so consuming it will consume `tuples` as well. 38 | 39 | key (str): the field to group by. 40 | 41 | tuples (Iter[(dict, obj)]): Iterable of tuples of (item, objconf) 42 | `item` is an element in the source stream and `objconf` is the item 43 | configuration (an Objectify instance). Note: this shares the 44 | `stream` iterator, so consuming it will consume `stream` as well. 45 | 46 | kwargs (dict): Keyword arguments. 47 | 48 | Kwargs: 49 | conf (dict): The pipe configuration. 50 | 51 | Returns: 52 | mixed: The output either a dict or iterable of dicts 53 | 54 | Examples: 55 | >>> from itertools import repeat 56 | >>> 57 | >>> stream = ({'x': x} for x in range(5)) 58 | >>> tuples = zip(stream, repeat(None)) 59 | >>> parser(stream, None, tuples, assign='content') == {'content': 5} 60 | True 61 | >>> conf = {'count_key': 'word'} 62 | >>> kwargs = {'conf': conf} 63 | >>> stream = [{'word': 'two'}, {'word': 'one'}, {'word': 'two'}] 64 | >>> tuples = zip(stream, repeat(conf['count_key'])) 65 | >>> counted = parser(stream, conf['count_key'], tuples, **kwargs) 66 | >>> next(counted) == {'one': 1} 67 | True 68 | >>> next(counted) == {'two': 2} 69 | True 70 | """ 71 | if key: 72 | keyfunc = itemgetter(key) 73 | sorted_stream = sorted(stream, key=keyfunc) 74 | grouped = it.groupby(sorted_stream, keyfunc) 75 | counted = ({key: len(list(group))} for key, group in grouped) 76 | else: 77 | counted = {kwargs["assign"]: len(list(stream))} 78 | 79 | return counted 80 | 81 | 82 | @operator(DEFAULTS, isasync=True, **OPTS) 83 | def async_pipe(*args, **kwargs): 84 | """An operator that asynchronously and eagerly counts the number of items 85 | in a stream. Note that this pipe is not lazy. 86 | 87 | Args: 88 | items (Iter[dict]): The source. 89 | kwargs (dict): The keyword arguments passed to the wrapper 90 | 91 | Kwargs: 92 | conf (dict): The pipe configuration. May contain the key 'count_key'. 93 | 94 | count_key (str): Item attribute to count by. This will group items 95 | in the stream by the given key and report a count for each 96 | group (default: None). 97 | 98 | assign (str): Attribute to assign parsed content. If `count_key` is set, 99 | this is ignored and the group keys are used instead. (default: 100 | content) 101 | 102 | Returns: 103 | Deferred: twisted.internet.defer.Deferred iterator of the number of 104 | counted items 105 | 106 | Examples: 107 | >>> from riko.bado import react 108 | >>> from riko.bado.mock import FakeReactor 109 | >>> 110 | >>> def run(reactor): 111 | ... callback = lambda x: print(next(x) == {'count': 5}) 112 | ... items = ({'x': x} for x in range(5)) 113 | ... d = async_pipe(items) 114 | ... return d.addCallbacks(callback, logger.error) 115 | >>> 116 | >>> try: 117 | ... react(run, _reactor=FakeReactor()) 118 | ... except SystemExit: 119 | ... pass 120 | ... 121 | True 122 | """ 123 | return parser(*args, **kwargs) 124 | 125 | 126 | @operator(DEFAULTS, **OPTS) 127 | def pipe(*args, **kwargs): 128 | """An operator that eagerly counts the number of items in a stream. 129 | Note that this pipe is not lazy. 130 | 131 | Args: 132 | items (Iter[dict]): The source. 133 | kwargs (dict): The keyword arguments passed to the wrapper 134 | 135 | Kwargs: 136 | conf (dict): The pipe configuration. May contain the key 'count_key'. 137 | 138 | count_key (str): Item attribute to count by. This will group items 139 | in the stream by the given key and report a count for each 140 | group (default: None). 141 | 142 | assign (str): Attribute to assign parsed content. If `count_key` is set, 143 | this is ignored and the group keys are used instead. (default: 144 | content) 145 | 146 | Yields: 147 | dict: the number of counted items 148 | 149 | Examples: 150 | >>> stream = ({'x': x} for x in range(5)) 151 | >>> next(pipe(stream)) == {'count': 5} 152 | True 153 | >>> stream = [{'word': 'two'}, {'word': 'one'}, {'word': 'two'}] 154 | >>> counted = pipe(stream, conf={'count_key': 'word'}) 155 | >>> next(counted) == {'one': 1} 156 | True 157 | >>> next(counted) == {'two': 2} 158 | True 159 | """ 160 | return parser(*args, **kwargs) 161 | -------------------------------------------------------------------------------- /riko/modules/currencyformat.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # vim: sw=4:ts=4:expandtab 3 | """ 4 | riko.modules.currencyformat 5 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 6 | Provides functions for formatting numbers to currency strings. 7 | 8 | Examples: 9 | basic usage:: 10 | 11 | >>> from riko.modules.currencyformat import pipe 12 | >>> 13 | >>> next(pipe({'content': '100'}))['currencyformat'] == '$100.00' 14 | True 15 | 16 | Attributes: 17 | OPTS (dict): The default pipe options 18 | DEFAULTS (dict): The default parser options 19 | """ 20 | from decimal import Decimal 21 | from babel.numbers import format_currency 22 | 23 | from . import processor 24 | import pygogo as gogo 25 | 26 | OPTS = {"ftype": "decimal", "field": "content"} 27 | DEFAULTS = {"currency": "USD"} 28 | NaN = Decimal("NaN") 29 | 30 | logger = gogo.Gogo(__name__, monolog=True).logger 31 | 32 | 33 | def parser(amount, objconf, skip=False, **kwargs): 34 | """Parsers the pipe content 35 | 36 | Args: 37 | amount (Decimal): The amount to format 38 | objconf (obj): The pipe configuration (an Objectify instance) 39 | skip (bool): Don't parse the content 40 | 41 | Returns: 42 | dict: The formatted item 43 | 44 | Examples: 45 | >>> from decimal import Decimal 46 | >>> from meza.fntools import Objectify 47 | >>> 48 | >>> objconf = Objectify({'currency': 'USD'}) 49 | >>> parser(Decimal('10.33'), objconf) == '$10.33' 50 | True 51 | """ 52 | if skip: 53 | parsed = kwargs["stream"] 54 | elif amount is not None: 55 | try: 56 | parsed = format_currency(amount, objconf.currency) 57 | except ValueError: 58 | parsed = NaN 59 | else: 60 | parsed = NaN 61 | 62 | return parsed 63 | 64 | 65 | @processor(DEFAULTS, isasync=True, **OPTS) 66 | def async_pipe(*args, **kwargs): 67 | """A processor module that asynchronously formats a number to a given 68 | currency string. 69 | 70 | Args: 71 | item (dict): The entry to process 72 | kwargs (dict): The keyword arguments passed to the wrapper 73 | 74 | Kwargs: 75 | conf (dict): The pipe configuration. May contain the key 'currency'. 76 | 77 | currency (str): The currency ISO abbreviation (default: USD). 78 | 79 | assign (str): Attribute to assign parsed content (default: 80 | currencyformat) 81 | 82 | field (str): Item attribute from which to obtain the string to be 83 | formatted (default: 'content') 84 | 85 | Returns: 86 | Deferred: twisted.internet.defer.Deferred item with formatted currency 87 | 88 | Examples: 89 | >>> from datetime import date 90 | >>> from riko.bado import react 91 | >>> from riko.bado.mock import FakeReactor 92 | >>> 93 | >>> def run(reactor): 94 | ... callback = lambda x: print(next(x)['currencyformat']) 95 | ... d = async_pipe({'content': '10.33'}) 96 | ... return d.addCallbacks(callback, logger.error) 97 | >>> 98 | >>> try: 99 | ... react(run, _reactor=FakeReactor()) 100 | ... except SystemExit: 101 | ... pass 102 | ... 103 | $10.33 104 | """ 105 | return parser(*args, **kwargs) 106 | 107 | 108 | @processor(DEFAULTS, **OPTS) 109 | def pipe(*args, **kwargs): 110 | """A processor module that formats a number to a given currency string. 111 | 112 | Args: 113 | item (dict): The entry to process 114 | kwargs (dict): The keyword arguments passed to the wrapper 115 | 116 | Kwargs: 117 | conf (dict): The pipe configuration. May contain the key 'currency'. 118 | 119 | currency (str): The currency ISO abbreviation (default: USD). 120 | 121 | assign (str): Attribute to assign parsed content (default: 122 | currencyformat) 123 | 124 | field (str): Item attribute from which to obtain the string to be 125 | formatted (default: 'content') 126 | 127 | Returns: 128 | dict: an item with formatted date string 129 | 130 | Examples: 131 | >>> next(pipe({'content': '10.33'}))['currencyformat'] == '$10.33' 132 | True 133 | >>> conf = {'currency': 'GBP'} 134 | >>> result = next(pipe({'content': '100'}, conf=conf)) 135 | >>> result['currencyformat'] == '£100.00' 136 | True 137 | """ 138 | return parser(*args, **kwargs) 139 | -------------------------------------------------------------------------------- /riko/modules/dateformat.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # vim: sw=4:ts=4:expandtab 3 | """ 4 | riko.modules.dateformat 5 | ~~~~~~~~~~~~~~~~~~~~~~~ 6 | Provides functions for formatting dates. 7 | 8 | A wide range of format specifiers can be used to create the output text string. 9 | The specifiers all begin with a percent sign followed by a single character. 10 | 11 | Here are a few specifiers and how they each format the date/time February 12th, 12 | 2008 at 8:45 P.M. 13 | 14 | Specifier Formatted Date 15 | ------------------------- ------------------------------- 16 | %m-%d-%Y 02-12-2008 17 | %A, %b %d, %y at %I:%M %p Tuesday, Feb 12, 08 at 08:45 PM 18 | %D 02/12/08 19 | %R 20:45 20 | %B February 21 | 22 | Examples: 23 | basic usage:: 24 | 25 | >>> from riko.modules.dateformat import pipe 26 | >>> from datetime import date 27 | >>> 28 | >>> next(pipe({'date': date(2015, 5, 4)}))['dateformat'] 29 | '05/04/2015 00:00:00' 30 | 31 | Attributes: 32 | OPTS (dict): The default pipe options 33 | DEFAULTS (dict): The default parser options 34 | """ 35 | from time import strftime 36 | 37 | from . import processor 38 | import pygogo as gogo 39 | 40 | OPTS = {"field": "date", "ftype": "date"} 41 | DEFAULTS = {"format": "%m/%d/%Y %H:%M:%S"} 42 | logger = gogo.Gogo(__name__, monolog=True).logger 43 | 44 | 45 | def parser(date, objconf, skip=False, **kwargs): 46 | """Obtains the user input 47 | 48 | Args: 49 | date (dict): Must have key 'date' with a date-like object value 50 | objconf (obj): The pipe configuration (an Objectify instance) 51 | skip (bool): Don't parse the content 52 | 53 | Returns: 54 | dict: The formatted date 55 | 56 | Examples: 57 | >>> from datetime import date 58 | >>> from meza.fntools import Objectify 59 | >>> 60 | >>> objconf = Objectify({'format': '%m/%d/%Y'}) 61 | >>> parser({'date': date(2015, 5, 4)}, objconf) 62 | '05/04/2015' 63 | """ 64 | timetuple = date["date"].timetuple() 65 | return kwargs["stream"] if skip else strftime(objconf.format, timetuple) 66 | 67 | 68 | @processor(DEFAULTS, isasync=True, **OPTS) 69 | def async_pipe(*args, **kwargs): 70 | """A processor module that asynchronously formats a date. 71 | 72 | Args: 73 | item (dict): The entry to process 74 | kwargs (dict): The keyword arguments passed to the wrapper 75 | 76 | Kwargs: 77 | conf (dict): The pipe configuration. May contain the keys 'format' or 78 | 'field'. 79 | 80 | format (str): Format string passed to time.strftime (default: 81 | '%m/%d/%Y %H:%M:%S', i.e., '02/12/2008 20:45:00') 82 | 83 | assign (str): Attribute to assign parsed content (default: 84 | dateformat) 85 | 86 | field (str): Item attribute from which to obtain the string to be 87 | formatted (default: 'date') 88 | 89 | Returns: 90 | Deferred: twisted.internet.defer.Deferred item with formatted date 91 | 92 | Examples: 93 | >>> from datetime import date 94 | >>> from riko.bado import react 95 | >>> from riko.bado.mock import FakeReactor 96 | >>> 97 | >>> def run(reactor): 98 | ... callback = lambda x: print(next(x)['dateformat']) 99 | ... d = async_pipe({'date': date(2015, 5, 4)}) 100 | ... return d.addCallbacks(callback, logger.error) 101 | >>> 102 | >>> try: 103 | ... react(run, _reactor=FakeReactor()) 104 | ... except SystemExit: 105 | ... pass 106 | ... 107 | 05/04/2015 00:00:00 108 | """ 109 | return parser(*args, **kwargs) 110 | 111 | 112 | @processor(DEFAULTS, **OPTS) 113 | def pipe(*args, **kwargs): 114 | """A processor module that formats a date. 115 | 116 | Args: 117 | item (dict): The entry to process 118 | kwargs (dict): The keyword arguments passed to the wrapper 119 | 120 | Kwargs: 121 | conf (dict): The pipe configuration. May contain the keys 'format' or 122 | 'field'. 123 | 124 | format (str): Format string passed to time.strftime (default: 125 | '%m/%d/%Y %H:%M:%S', i.e., '02/12/2008 20:45:00') 126 | 127 | assign (str): Attribute to assign parsed content (default: 128 | dateformat) 129 | 130 | field (str): Item attribute from which to obtain the string to be 131 | formatted (default: 'date') 132 | 133 | Returns: 134 | dict: an item with formatted date string 135 | 136 | Examples: 137 | >>> from datetime import date 138 | >>> item = {'date': date(2015, 5, 4)} 139 | >>> next(pipe(item))['dateformat'] 140 | '05/04/2015 00:00:00' 141 | >>> next(pipe(item, conf={'format': '%Y'}))['dateformat'] 142 | '2015' 143 | >>> next(pipe({'date': '05/04/2015'}))['dateformat'] 144 | '05/04/2015 00:00:00' 145 | """ 146 | return parser(*args, **kwargs) 147 | -------------------------------------------------------------------------------- /riko/modules/feedautodiscovery.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # vim: sw=4:ts=4:expandtab 3 | """ 4 | riko.modules.feedautodiscovery 5 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 6 | Provides functions for finding the all available RSS and Atom feeds in a web 7 | site. 8 | 9 | Lets you enter a url and then examines those pages for information (like link 10 | rel tags) about available feeds. If information about more than one feed is 11 | found, then multiple items are returned. Because more than one feed can be 12 | returned, the output from this module is often piped into a Fetch Feed module. 13 | 14 | Also note that not all sites provide auto-discovery links on their web site's 15 | home page. For a simpler alternative, try the Fetch Site Feed Module. It 16 | returns the content from the first discovered feed. 17 | 18 | Examples: 19 | basic usage:: 20 | 21 | >>> from riko import get_path 22 | >>> from riko.modules.feedautodiscovery import pipe 23 | >>> 24 | >>> entry = next(pipe(conf={'url': get_path('bbc.html')})) 25 | >>> sorted(entry) == ['href', 'hreflang', 'link', 'rel', 'tag'] 26 | True 27 | >>> entry['link'] == 'file://riko/data/greenhughes.xml' 28 | True 29 | 30 | Attributes: 31 | OPTS (dict): The default pipe options 32 | DEFAULTS (dict): The default parser options 33 | """ 34 | import pygogo as gogo 35 | 36 | from . import processor 37 | from riko import autorss 38 | from riko.utils import get_abspath 39 | from riko.bado import coroutine, return_value 40 | 41 | 42 | OPTS = {"ftype": "none"} 43 | logger = gogo.Gogo(__name__, monolog=True).logger 44 | 45 | 46 | @coroutine 47 | def async_parser(_, objconf, skip=False, **kwargs): 48 | """Asynchronously parses the pipe content 49 | 50 | Args: 51 | _ (None): Ignored 52 | objconf (obj): The pipe configuration (an Objectify instance) 53 | skip (bool): Don't parse the content 54 | kwargs (dict): Keyword arguments 55 | 56 | Kwargs: 57 | stream (dict): The original item 58 | 59 | Returns: 60 | Iter[dict]: Deferred stream 61 | 62 | Examples: 63 | >>> from riko import get_path 64 | >>> from riko.bado import react 65 | >>> from riko.bado.mock import FakeReactor 66 | >>> from meza.fntools import Objectify 67 | >>> 68 | >>> def run(reactor): 69 | ... callback = lambda x: print(next(x)['link']) 70 | ... objconf = Objectify({'url': get_path('bbc.html')}) 71 | ... d = async_parser(None, objconf, stream={}) 72 | ... return d.addCallbacks(callback, logger.error) 73 | >>> 74 | >>> try: 75 | ... react(run, _reactor=FakeReactor()) 76 | ... except SystemExit: 77 | ... pass 78 | ... 79 | file://riko/data/greenhughes.xml 80 | """ 81 | if skip: 82 | stream = kwargs["stream"] 83 | else: 84 | url = get_abspath(objconf.url) 85 | stream = yield autorss.async_get_rss(url) 86 | 87 | return_value(stream) 88 | 89 | 90 | def parser(_, objconf, skip=False, **kwargs): 91 | """Parses the pipe content 92 | 93 | Args: 94 | _ (None): Ignored 95 | objconf (obj): The pipe configuration (an Objectify instance) 96 | skip (bool): Don't parse the content 97 | kwargs (dict): Keyword arguments 98 | 99 | Kwargs: 100 | stream (dict): The original item 101 | 102 | Returns: 103 | Iter[dict]: The stream of items 104 | 105 | Examples: 106 | >>> from riko import get_path 107 | >>> from meza.fntools import Objectify 108 | >>> 109 | >>> objconf = Objectify({'url': get_path('bbc.html')}) 110 | >>> result = parser(None, objconf, stream={}) 111 | >>> next(result)['link'] == 'file://riko/data/greenhughes.xml' 112 | True 113 | """ 114 | if skip: 115 | stream = kwargs["stream"] 116 | else: 117 | url = get_abspath(objconf.url) 118 | stream = autorss.get_rss(url) 119 | 120 | return stream 121 | 122 | 123 | @processor(isasync=True, **OPTS) 124 | def async_pipe(*args, **kwargs): 125 | """A source that fetches and parses the first feed found on a site. 126 | 127 | Args: 128 | item (dict): The entry to process (not used) 129 | kwargs (dict): The keyword arguments passed to the wrapper. 130 | 131 | Kwargs: 132 | conf (dict): The pipe configuration. Must contain the key 'url'. 133 | 134 | url (str): The web site to fetch 135 | 136 | Returns: 137 | dict: twisted.internet.defer.Deferred an iterator of items 138 | 139 | Examples: 140 | >>> from riko import get_path 141 | >>> from riko.bado import react 142 | >>> from riko.bado.mock import FakeReactor 143 | >>> 144 | >>> def run(reactor): 145 | ... callback = lambda x: print(next(x)['link']) 146 | ... d = async_pipe(conf={'url': get_path('bbc.html')}) 147 | ... return d.addCallbacks(callback, logger.error) 148 | >>> 149 | >>> try: 150 | ... react(run, _reactor=FakeReactor()) 151 | ... pass 152 | ... except SystemExit: 153 | ... pass 154 | ... 155 | file://riko/data/greenhughes.xml 156 | """ 157 | return async_parser(*args, **kwargs) 158 | 159 | 160 | @processor(**OPTS) 161 | def pipe(*args, **kwargs): 162 | """A source that fetches and parses the first feed found on a site. 163 | 164 | Args: 165 | item (dict): The entry to process (not used) 166 | kwargs (dict): The keyword arguments passed to the wrapper 167 | 168 | Kwargs: 169 | conf (dict): The pipe configuration. Must contain the key 'url'. 170 | 171 | url (str): The web site to fetch 172 | 173 | Yields: 174 | dict: item 175 | 176 | Examples: 177 | >>> from riko import get_path 178 | >>> conf = {'url': get_path('bbc.html')} 179 | >>> next(pipe(conf=conf))['link'] == 'file://riko/data/greenhughes.xml' 180 | True 181 | """ 182 | return parser(*args, **kwargs) 183 | -------------------------------------------------------------------------------- /riko/modules/fetchsitefeed.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # vim: sw=4:ts=4:expandtab 3 | """ 4 | riko.modules.fetchsitefeed 5 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 6 | Provides functions for fetching the first RSS or Atom feed discovered in a web 7 | site. 8 | 9 | Uses a web site's auto-discovery information to find an RSS or Atom feed. If 10 | multiple feeds are discovered, only the first one is fetched. If a site changes 11 | their feed URL in the future, this module can discover the new URL for you (as 12 | long as the site updates their auto-discovery links). For sites with only one 13 | stream, this module provides a good alternative to the Fetch Feed module. 14 | 15 | Also note that not all sites provide auto-discovery links on their web site's 16 | home page. 17 | 18 | This module provides a simpler alternative to the Feed Auto-Discovery Module. 19 | The latter returns a list of information about all the feeds discovered in a 20 | site, but (unlike this module) doesn't fetch the feed data itself. 21 | 22 | Examples: 23 | basic usage:: 24 | 25 | >>> from riko import get_path 26 | >>> from riko.modules.fetchsitefeed import pipe 27 | >>> 28 | >>> title = 'Using NFC tags in the car' 29 | >>> next(pipe(conf={'url': get_path('bbc.html')}))['title'] == title 30 | True 31 | 32 | Attributes: 33 | OPTS (dict): The default pipe options 34 | DEFAULTS (dict): The default parser options 35 | """ 36 | import pygogo as gogo 37 | 38 | from . import processor 39 | 40 | from riko import autorss 41 | from riko.utils import gen_entries, get_abspath 42 | from riko.parsers import parse_rss 43 | from riko.bado import coroutine, return_value, io 44 | 45 | OPTS = {"ftype": "none"} 46 | logger = gogo.Gogo(__name__, monolog=True).logger 47 | 48 | 49 | @coroutine 50 | def async_parser(_, objconf, skip=False, **kwargs): 51 | """Asynchronously parses the pipe content 52 | 53 | Args: 54 | _ (None): Ignored 55 | objconf (obj): The pipe configuration (an Objectify instance) 56 | skip (bool): Don't parse the content 57 | kwargs (dict): Keyword arguments 58 | 59 | Kwargs: 60 | stream (dict): The original item 61 | 62 | Returns: 63 | Iter[dict]: The stream of items 64 | 65 | Examples: 66 | >>> from riko import get_path 67 | >>> from riko.bado import react 68 | >>> from riko.bado.mock import FakeReactor 69 | >>> from meza.fntools import Objectify 70 | >>> 71 | >>> def run(reactor): 72 | ... callback = lambda x: print(next(x)['title']) 73 | ... objconf = Objectify({'url': get_path('bbc.html')}) 74 | ... d = async_parser(None, objconf, stream={}) 75 | ... return d.addCallbacks(callback, logger.error) 76 | >>> 77 | >>> try: 78 | ... react(run, _reactor=FakeReactor()) 79 | ... except SystemExit: 80 | ... pass 81 | ... 82 | Using NFC tags in the car 83 | """ 84 | if skip: 85 | stream = kwargs["stream"] 86 | else: 87 | url = get_abspath(objconf.url) 88 | rss = yield autorss.async_get_rss(url) 89 | link = get_abspath(next(rss)["link"]) 90 | content = yield io.async_url_read(link) 91 | parsed = parse_rss(content) 92 | stream = gen_entries(parsed) 93 | 94 | return_value(stream) 95 | 96 | 97 | def parser(_, objconf, skip=False, **kwargs): 98 | """Parses the pipe content 99 | 100 | Args: 101 | _ (None): Ignored 102 | objconf (obj): The pipe configuration (an Objectify instance) 103 | skip (bool): Don't parse the content 104 | kwargs (dict): Keyword arguments 105 | 106 | Kwargs: 107 | stream (dict): The original item 108 | 109 | Returns: 110 | Iter[dict]: The stream of items 111 | 112 | Examples: 113 | >>> from riko import get_path 114 | >>> from meza.fntools import Objectify 115 | >>> 116 | >>> objconf = Objectify({'url': get_path('bbc.html')}) 117 | >>> result = parser(None, objconf, stream={}) 118 | >>> next(result)['title'] == 'Using NFC tags in the car' 119 | True 120 | """ 121 | if skip: 122 | stream = kwargs["stream"] 123 | else: 124 | url = get_abspath(objconf.url) 125 | rss = autorss.get_rss(url) 126 | objconf.url = get_abspath(next(rss)["link"]) 127 | 128 | parsed = parse_rss(**objconf) 129 | stream = gen_entries(parsed) 130 | 131 | return stream 132 | 133 | 134 | @processor(isasync=True, **OPTS) 135 | def async_pipe(*args, **kwargs): 136 | """A source that fetches and parses the first feed found on a site. 137 | 138 | Args: 139 | item (dict): The entry to process (not used) 140 | kwargs (dict): The keyword arguments passed to the wrapper. 141 | 142 | Kwargs: 143 | conf (dict): The pipe configuration. Must contain the key 'url'. 144 | 145 | url (str): The web site to fetch 146 | 147 | Returns: 148 | dict: twisted.internet.defer.Deferred an iterator of items 149 | 150 | Examples: 151 | >>> from riko import get_path 152 | >>> from riko.bado import react 153 | >>> from riko.bado.mock import FakeReactor 154 | >>> 155 | >>> def run(reactor): 156 | ... callback = lambda x: print(next(x)['title']) 157 | ... d = async_pipe(conf={'url': get_path('bbc.html')}) 158 | ... return d.addCallbacks(callback, logger.error) 159 | >>> 160 | >>> try: 161 | ... react(run, _reactor=FakeReactor()) 162 | ... pass 163 | ... except SystemExit: 164 | ... pass 165 | ... 166 | Using NFC tags in the car 167 | """ 168 | return async_parser(*args, **kwargs) 169 | 170 | 171 | @processor(**OPTS) 172 | def pipe(*args, **kwargs): 173 | """A source that fetches and parses the first feed found on a site. 174 | 175 | Args: 176 | item (dict): The entry to process (not used) 177 | kwargs (dict): The keyword arguments passed to the wrapper 178 | 179 | Kwargs: 180 | conf (dict): The pipe configuration. Must contain the key 'url'. 181 | 182 | url (str): The web site to fetch 183 | 184 | Yields: 185 | dict: item 186 | 187 | Examples: 188 | >>> from riko import get_path 189 | >>> title = 'Using NFC tags in the car' 190 | >>> next(pipe(conf={'url': get_path('bbc.html')}))['title'] == title 191 | True 192 | """ 193 | return parser(*args, **kwargs) 194 | -------------------------------------------------------------------------------- /riko/modules/fetchtext.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # vim: sw=4:ts=4:expandtab 3 | """ 4 | riko.modules.fetchtext 5 | ~~~~~~~~~~~~~~~~~~~~~~ 6 | Provides functions for fetching text data sources. 7 | 8 | Accesses and extracts data from text sources on the web. This data can then be 9 | merged with other data in your Pipe. 10 | 11 | Examples: 12 | basic usage:: 13 | 14 | >>> from riko import get_path 15 | >>> from riko.modules.fetchtext import pipe 16 | >>> 17 | >>> conf = {'url': get_path('lorem.txt')} 18 | >>> next(pipe(conf=conf))['content'] == 'What is Lorem Ipsum?' 19 | True 20 | 21 | Attributes: 22 | OPTS (dict): The default pipe options 23 | DEFAULTS (dict): The default parser options 24 | """ 25 | import pygogo as gogo 26 | 27 | from . import processor 28 | from riko import ENCODING 29 | from riko.utils import fetch, auto_close, get_abspath 30 | from riko.bado import coroutine, return_value, io 31 | 32 | OPTS = {"ftype": "none", "assign": "content"} 33 | DEFAULTS = {"encoding": ENCODING} 34 | logger = gogo.Gogo(__name__, monolog=True).logger 35 | 36 | 37 | @coroutine 38 | def async_parser(_, objconf, skip=False, **kwargs): 39 | """Asynchronously parses the pipe content 40 | 41 | Args: 42 | _ (None): Ignored 43 | objconf (obj): The pipe configuration (an Objectify instance) 44 | skip (bool): Don't parse the content 45 | kwargs (dict): Keyword arguments 46 | 47 | Kwargs: 48 | stream (dict): The original item 49 | 50 | Returns: 51 | Iter[dict]: The stream of items 52 | 53 | Examples: 54 | >>> from riko import get_path 55 | >>> from riko.bado import react 56 | >>> from riko.bado.mock import FakeReactor 57 | >>> from meza.fntools import Objectify 58 | >>> 59 | >>> def run(reactor): 60 | ... callback = lambda x: print(next(x)['content']) 61 | ... url = get_path('lorem.txt') 62 | ... objconf = Objectify({'url': url, 'encoding': ENCODING}) 63 | ... d = async_parser(None, objconf, assign='content') 64 | ... return d.addCallbacks(callback, logger.error) 65 | >>> 66 | >>> try: 67 | ... react(run, _reactor=FakeReactor()) 68 | ... except SystemExit: 69 | ... pass 70 | ... 71 | What is Lorem Ipsum? 72 | """ 73 | if skip: 74 | stream = kwargs["stream"] 75 | else: 76 | url = get_abspath(objconf.url) 77 | f = yield io.async_url_open(url) 78 | assign = kwargs["assign"] 79 | encoding = objconf.encoding 80 | _stream = ({assign: line.strip().decode(encoding)} for line in f) 81 | stream = auto_close(_stream, f) 82 | 83 | return_value(stream) 84 | 85 | 86 | def parser(_, objconf, skip=False, **kwargs): 87 | """Parses the pipe content 88 | 89 | Args: 90 | _ (None): Ignored 91 | objconf (obj): The pipe configuration (an Objectify instance) 92 | skip (bool): Don't parse the content 93 | kwargs (dict): Keyword arguments 94 | 95 | Kwargs: 96 | stream (dict): The original item 97 | 98 | Returns: 99 | Iter[dict]: The stream of items 100 | 101 | Examples: 102 | >>> from riko import get_path 103 | >>> from meza.fntools import Objectify 104 | >>> 105 | >>> url = get_path('lorem.txt') 106 | >>> objconf = Objectify({'url': url, 'encoding': ENCODING}) 107 | >>> result = parser(None, objconf, assign='content') 108 | >>> next(result)['content'] == 'What is Lorem Ipsum?' 109 | True 110 | """ 111 | if skip: 112 | stream = kwargs["stream"] 113 | else: 114 | f = fetch(decode=True, **objconf) 115 | _stream = ({kwargs["assign"]: line.strip()} for line in f) 116 | stream = auto_close(_stream, f) 117 | 118 | return stream 119 | 120 | 121 | @processor(DEFAULTS, isasync=True, **OPTS) 122 | def async_pipe(*args, **kwargs): 123 | """A source that asynchronously fetches and parses an XML or JSON file to 124 | return the entries. 125 | 126 | Args: 127 | item (dict): The entry to process 128 | kwargs (dict): The keyword arguments passed to the wrapper 129 | 130 | Kwargs: 131 | conf (dict): The pipe configuration. Must contain the key 'url'. May 132 | contain the key 'encoding'. 133 | 134 | url (str): The web site to fetch. 135 | encoding (str): The file encoding (default: utf-8). 136 | 137 | assign (str): Attribute to assign parsed content (default: content) 138 | 139 | 140 | Returns: 141 | Deferred: twisted.internet.defer.Deferred stream of items 142 | 143 | Examples: 144 | >>> from riko import get_path 145 | >>> from riko.bado import react 146 | >>> from riko.bado.mock import FakeReactor 147 | >>> 148 | >>> def run(reactor): 149 | ... callback = lambda x: print(next(x)['content']) 150 | ... conf = {'url': get_path('lorem.txt')} 151 | ... d = async_pipe(conf=conf) 152 | ... return d.addCallbacks(callback, logger.error) 153 | >>> 154 | >>> try: 155 | ... react(run, _reactor=FakeReactor()) 156 | ... except SystemExit: 157 | ... pass 158 | ... 159 | What is Lorem Ipsum? 160 | """ 161 | return async_parser(*args, **kwargs) 162 | 163 | 164 | @processor(DEFAULTS, **OPTS) 165 | def pipe(*args, **kwargs): 166 | """A source that fetches and parses an XML or JSON file to 167 | return the entries. 168 | 169 | Args: 170 | item (dict): The entry to process 171 | kwargs (dict): The keyword arguments passed to the wrapper 172 | 173 | Kwargs: 174 | conf (dict): The pipe configuration. Must contain the key 'url'. May 175 | contain the key 'encoding'. 176 | 177 | url (str): The web site to fetch 178 | encoding (str): The file encoding (default: utf-8). 179 | 180 | assign (str): Attribute to assign parsed content (default: content) 181 | 182 | Returns: 183 | dict: an iterator of items 184 | 185 | Examples: 186 | >>> from riko import get_path 187 | >>> 188 | >>> conf = {'url': get_path('lorem.txt')} 189 | >>> next(pipe(conf=conf))['content'] == 'What is Lorem Ipsum?' 190 | True 191 | """ 192 | return parser(*args, **kwargs) 193 | -------------------------------------------------------------------------------- /riko/modules/geolocate.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # vim: sw=4:ts=4:expandtab 3 | """ 4 | riko.modules.geolocate 5 | ~~~~~~~~~~~~~~~~~~~~~~ 6 | Provides functions for obtaining the geo location of an ip address, street 7 | address, currency code, or lat/lon coordinates. 8 | 9 | Examples: 10 | basic usage:: 11 | 12 | >>> from riko.modules.geolocate import pipe 13 | >>> 14 | >>> address = '123 Bakersville St., London' 15 | >>> geolocate = next(pipe({'content': address}))['geolocate'] 16 | >>> geolocate['country'] == 'United States' 17 | True 18 | 19 | 20 | Attributes: 21 | OPTS (dict): The default pipe options 22 | DEFAULTS (dict): The default parser options 23 | """ 24 | import pygogo as gogo 25 | 26 | from . import processor 27 | from riko.utils import cast 28 | 29 | 30 | OPTS = {"ftype": "text", "field": "content"} 31 | DEFAULTS = {"type": "street_address"} 32 | logger = gogo.Gogo(__name__, monolog=True).logger 33 | 34 | 35 | def parser(address, objconf, skip=False, **kwargs): 36 | """Parses the pipe content 37 | 38 | Args: 39 | address (str): The address to lookup 40 | objconf (obj): The pipe configuration (an Objectify instance) 41 | skip (bool): Don't parse the content 42 | kwargs (dict): Keyword arguments 43 | 44 | Kwargs: 45 | assign (str): Attribute to assign parsed content (default: geolocate) 46 | stream (dict): The original item 47 | 48 | Returns: 49 | Tuple(dict, bool): Tuple of (item, skip) 50 | 51 | Examples: 52 | >>> from riko import get_path 53 | >>> from meza.fntools import Objectify 54 | >>> 55 | >>> item = {'content': 'GBP'} 56 | >>> objconf = Objectify({'type': 'currency'}) 57 | >>> kwargs = {'stream': item, 'assign': 'content'} 58 | >>> country = 'United Kingdom' 59 | >>> parser(item['content'], objconf, **kwargs)['country'] == country 60 | True 61 | """ 62 | if skip: 63 | location = kwargs["stream"] 64 | else: 65 | location = cast(address, "location", loc_type=objconf.type) 66 | 67 | return location 68 | 69 | 70 | # @processor(DEFAULTS, isasync=True, **OPTS) 71 | # def async_pipe(*args, **kwargs): 72 | # """A processor module that asynchronously performs basic arithmetic, such 73 | # as addition and subtraction. 74 | 75 | # Args: 76 | # item (dict): The entry to process 77 | # kwargs (dict): The keyword arguments passed to the wrapper 78 | 79 | # Kwargs: 80 | # conf (dict): The pipe configuration. May contain the key 'type'. 81 | 82 | # type (str): The type of geolocation to perform. Must be one of 83 | # 'coordinates', 'street_address', 'ip_address', or 'currency' 84 | # (default: 'street_address'). 85 | 86 | # assign (str): Attribute to assign parsed content (default: geolocate) 87 | # field (str): Item attribute from which to obtain the first address to 88 | # operate on (default: 'content') 89 | 90 | # Returns: 91 | # Deferred: twisted.internet.defer.Deferred item with formatted currency 92 | 93 | # Examples: 94 | # >>> from riko.bado import react 95 | # >>> from riko.bado.mock import FakeReactor 96 | # >>> 97 | # >>> def run(reactor): 98 | # ... callback = lambda x: print(next(x)['geolocate']['country']) 99 | # ... conf = {'type': 'currency'} 100 | # ... d = async_pipe({'content': 'GBP'}, conf=conf) 101 | # ... return d.addCallbacks(callback, logger.error) 102 | # >>> 103 | # >>> try: 104 | # ... react(run, _reactor=FakeReactor()) 105 | # ... except SystemExit: 106 | # ... pass 107 | # ... 108 | # United Kingdom 109 | # """ 110 | # return parser(*args, **kwargs) 111 | 112 | 113 | @processor(DEFAULTS, **OPTS) 114 | def pipe(*args, **kwargs): 115 | """A processor module that performs basic arithmetic, such as addition and 116 | subtraction. 117 | 118 | Args: 119 | item (dict): The entry to process 120 | kwargs (dict): The keyword arguments passed to the wrapper 121 | 122 | Kwargs: 123 | conf (dict): The pipe configuration. May contain the key 'type'. 124 | 125 | type (str): The type of geolocation to perform. Must be one of 126 | 'coordinates', 'street_address', 'ip_address', or 'currency' 127 | (default: 'street_address'). 128 | 129 | assign (str): Attribute to assign parsed content (default: geolocate) 130 | field (str): Item attribute from which to obtain the first address to 131 | operate on (default: 'content') 132 | 133 | Returns: 134 | dict: an item with math result 135 | 136 | Examples: 137 | >>> conf = {'type': 'currency'} 138 | >>> geolocate = next(pipe({'content': 'INR'}, conf=conf))['geolocate'] 139 | >>> geolocate['country'] == 'India' 140 | True 141 | >>> address = '123 Bakersville St., London' 142 | >>> kwargs = {'field': 'address', 'assign': 'result'} 143 | >>> geolocate = next(pipe({'address': address}, **kwargs))['result'] 144 | >>> geolocate['country'] == 'United States' 145 | True 146 | """ 147 | return parser(*args, **kwargs) 148 | -------------------------------------------------------------------------------- /riko/modules/hash.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # vim: sw=4:ts=4:expandtab 3 | """ 4 | riko.modules.hash 5 | ~~~~~~~~~~~~~~~~~ 6 | Provides functions for hashing text. 7 | 8 | Note: If the PYTHONHASHSEED environment variable is set to an integer value, 9 | it is used as a fixed seed for generating the hash. Its purpose is to allow 10 | repeatable hashing across python processes and versions. The integer must be a 11 | decimal number in the range [0, 4294967295]. 12 | 13 | Specifying the value 0 will disable hash randomization. If this variable is set 14 | to `random`, a random value is used to seed the hashes. Hash randomization is 15 | is enabled by default for Python 3.2.3+, and disabled otherwise. 16 | 17 | Examples: 18 | basic usage:: 19 | 20 | >>> from riko.modules.hash import pipe 21 | >>> 22 | >>> _hash = ctypes.c_uint(hash('hello world')).value 23 | >>> next(pipe({'content': 'hello world'}))['hash'] == _hash 24 | True 25 | 26 | Attributes: 27 | OPTS (dict): The default pipe options 28 | DEFAULTS (dict): The default parser options 29 | """ 30 | import ctypes 31 | 32 | from . import processor 33 | import pygogo as gogo 34 | 35 | OPTS = {"ftype": "text", "ptype": "none", "field": "content"} 36 | DEFAULTS = {} 37 | logger = gogo.Gogo(__name__, monolog=True).logger 38 | 39 | 40 | def parser(word, _, skip=False, **kwargs): 41 | """Parsers the pipe content 42 | 43 | Args: 44 | item (obj): The entry to process (a DotDict instance) 45 | _ (None): Ignored. 46 | skip (bool): Don't parse the content 47 | kwargs (dict): Keyword arguments 48 | 49 | Kwargs: 50 | assign (str): Attribute to assign parsed content (default: hash) 51 | stream (dict): The original item 52 | 53 | Returns: 54 | dict: The item 55 | 56 | Examples: 57 | >>> from meza.fntools import Objectify 58 | >>> 59 | >>> _hash = ctypes.c_uint(hash('hello world')).value 60 | >>> item = {'content': 'hello world'} 61 | >>> kwargs = {'stream': item} 62 | >>> parser(item['content'], None, **kwargs) == _hash 63 | True 64 | """ 65 | return kwargs["stream"] if skip else ctypes.c_uint(hash(word)).value 66 | 67 | 68 | @processor(DEFAULTS, isasync=True, **OPTS) 69 | def async_pipe(*args, **kwargs): 70 | """A processor module that asynchronously hashes the field of an item. 71 | 72 | Args: 73 | item (dict): The entry to process 74 | kwargs (dict): The keyword arguments passed to the wrapper 75 | 76 | Kwargs: 77 | assign (str): Attribute to assign parsed content (default: hash) 78 | field (str): Item attribute to operate on (default: 'content') 79 | 80 | Returns: 81 | Deferred: twisted.internet.defer.Deferred item with hashed content 82 | 83 | Examples: 84 | >>> from riko.bado import react 85 | >>> from riko.bado.mock import FakeReactor 86 | >>> 87 | >>> _hash = ctypes.c_uint(hash('hello world')).value 88 | >>> 89 | >>> def run(reactor): 90 | ... callback = lambda x: print(next(x)['hash'] == _hash) 91 | ... d = async_pipe({'content': 'hello world'}) 92 | ... return d.addCallbacks(callback, logger.error) 93 | >>> 94 | >>> try: 95 | ... react(run, _reactor=FakeReactor()) 96 | ... except SystemExit: 97 | ... pass 98 | ... 99 | True 100 | """ 101 | return parser(*args, **kwargs) 102 | 103 | 104 | @processor(**OPTS) 105 | def pipe(*args, **kwargs): 106 | """A processor that hashes the field of an item. 107 | 108 | Args: 109 | item (dict): The entry to process 110 | kwargs (dict): The keyword arguments passed to the wrapper 111 | 112 | Kwargs: 113 | assign (str): Attribute to assign parsed content (default: hash) 114 | field (str): Item attribute to operate on (default: 'content') 115 | 116 | Yields: 117 | dict: an item with hashed content 118 | 119 | Examples: 120 | >>> _hash = ctypes.c_uint(hash('hello world')).value 121 | >>> next(pipe({'content': 'hello world'}))['hash'] == _hash 122 | True 123 | >>> _hash = ctypes.c_uint(hash('greeting')).value 124 | >>> kwargs = {'field': 'title', 'assign': 'result'} 125 | >>> next(pipe({'title': 'greeting'}, **kwargs))['result'] == _hash 126 | True 127 | """ 128 | return parser(*args, **kwargs) 129 | -------------------------------------------------------------------------------- /riko/modules/itembuilder.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # vim: sw=4:ts=4:expandtab 3 | """ 4 | riko.modules.itembuilder 5 | ~~~~~~~~~~~~~~~~~~~~~~~~ 6 | Provides functions for creating a single-item data source 7 | 8 | With the Item Builder module, you can create a single-item data source by 9 | assigning values to one or more item attributes. The module lets you assign 10 | a value to an attribute. 11 | 12 | Item Builder's strength is its ability to restructure and rename multiple 13 | elements in a stream. When Item Builder is fed an input stream, the assigned 14 | values can be existing attributes of the stream. These attributes can be 15 | reassigned or used to create entirely new attributes. 16 | 17 | Examples: 18 | basic usage:: 19 | 20 | >>> from riko.modules.itembuilder import pipe 21 | >>> 22 | >>> attrs = {'key': 'title', 'value': 'the title'} 23 | >>> next(pipe(conf={'attrs': attrs}))['title'] == 'the title' 24 | True 25 | 26 | Attributes: 27 | OPTS (dict): The default pipe options 28 | DEFAULTS (dict): The default parser options 29 | """ 30 | 31 | from . import processor 32 | import pygogo as gogo 33 | from riko.dotdict import DotDict 34 | 35 | OPTS = {"listize": True, "extract": "attrs", "ftype": "none"} 36 | logger = gogo.Gogo(__name__, monolog=True).logger 37 | 38 | 39 | def parser(_, attrs, skip=False, **kwargs): 40 | """Parses the pipe content 41 | 42 | Args: 43 | _ (None): Ignored 44 | attrs (List[dict]): Attributes 45 | skip (bool): Don't parse the content 46 | kwargs (dict): Keyword arguments 47 | 48 | Kwargs: 49 | stream (dict): The original item 50 | 51 | Returns: 52 | Iter(dict): The stream of items 53 | 54 | Examples: 55 | >>> from meza.fntools import Objectify 56 | >>> attrs = [ 57 | ... {'key': 'title', 'value': 'the title'}, 58 | ... {'key': 'desc', 'value': 'the desc'}] 59 | >>> result = parser(None, map(Objectify, attrs)) 60 | >>> result == {'title': 'the title', 'desc': 'the desc'} 61 | True 62 | """ 63 | items = ((a.key, a.value) for a in attrs) 64 | return kwargs["stream"] if skip else DotDict(items) 65 | 66 | 67 | @processor(isasync=True, **OPTS) 68 | def async_pipe(*args, **kwargs): 69 | """A source that asynchronously builds an item. 70 | 71 | Args: 72 | item (dict): The entry to process 73 | kwargs (dict): The keyword arguments passed to the wrapper 74 | 75 | Kwargs: 76 | conf (dict): The pipe configuration. Must contain the key 'attrs'. 77 | 78 | attrs (dict): can be either a dict or list of dicts. Must contain 79 | the keys 'key' and 'value'. 80 | 81 | key (str): the attribute name 82 | value (str): the attribute value 83 | 84 | Returns: 85 | dict: twisted.internet.defer.Deferred an iterator of items 86 | 87 | Examples: 88 | >>> from riko.bado import react 89 | >>> from riko.bado.mock import FakeReactor 90 | >>> 91 | >>> def run(reactor): 92 | ... callback = lambda x: print(next(x)['title']) 93 | ... attrs = [ 94 | ... {'key': 'title', 'value': 'the title'}, 95 | ... {'key': 'desc.content', 'value': 'the desc'}] 96 | ... 97 | ... d = async_pipe(conf={'attrs': attrs}) 98 | ... return d.addCallbacks(callback, logger.error) 99 | >>> 100 | >>> try: 101 | ... react(run, _reactor=FakeReactor()) 102 | ... pass 103 | ... except SystemExit: 104 | ... pass 105 | ... 106 | the title 107 | """ 108 | return parser(*args, **kwargs) 109 | 110 | 111 | @processor(**OPTS) 112 | def pipe(*args, **kwargs): 113 | """A source that builds an item. 114 | 115 | Args: 116 | item (dict): The entry to process 117 | kwargs (dict): The keyword arguments passed to the wrapper 118 | 119 | Kwargs: 120 | conf (dict): The pipe configuration. Must contain the key 'attrs'. 121 | 122 | attrs (dict): can be either a dict or list of dicts. Must contain 123 | the keys 'key' and 'value'. 124 | 125 | key (str): the attribute name 126 | value (str): the attribute value 127 | 128 | Yields: 129 | dict: an item 130 | 131 | Examples: 132 | >>> attrs = [ 133 | ... {'key': 'title', 'value': 'the title'}, 134 | ... {'key': 'desc.content', 'value': 'the desc'}] 135 | >>> next(pipe(conf={'attrs': attrs})) == { 136 | ... 'title': 'the title', 'desc': {'content': 'the desc'}} 137 | True 138 | """ 139 | return parser(*args, **kwargs) 140 | -------------------------------------------------------------------------------- /riko/modules/reverse.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # vim: sw=4:ts=4:expandtab 3 | """ 4 | riko.modules.reverse 5 | ~~~~~~~~~~~~~~~~~~~~ 6 | Provides functions for flipping the order of all items in a stream. 7 | 8 | Examples: 9 | basic usage:: 10 | 11 | >>> from riko.modules.reverse import pipe 12 | >>> 13 | >>> next(pipe({'x': x} for x in range(5))) == {'x': 4} 14 | True 15 | 16 | Attributes: 17 | OPTS (dict): The default pipe options 18 | DEFAULTS (dict): The default parser options 19 | """ 20 | 21 | from . import operator 22 | import pygogo as gogo 23 | 24 | # disable `dictize` since we do not need to access the configuration 25 | OPTS = {"dictize": False} 26 | logger = gogo.Gogo(__name__, monolog=True).logger 27 | 28 | 29 | def parser(stream, objconf, tuples, **kwargs): 30 | """Parses the pipe content 31 | 32 | Args: 33 | stream (Iter[dict]): The source. Note: this shares the `tuples` 34 | iterator, so consuming it will consume `tuples` as well. 35 | 36 | objconf (obj): the item independent configuration (an Objectify 37 | instance). 38 | 39 | tuples (Iter[(dict, obj)]): Iterable of tuples of (item, objconf) 40 | `item` is an element in the source stream and `objconf` is the item 41 | configuration (an Objectify instance). Note: this shares the 42 | `stream` iterator, so consuming it will consume `stream` as well. 43 | 44 | kwargs (dict): Keyword arguments. 45 | 46 | Returns: 47 | Iter(dict): The output stream 48 | 49 | Examples: 50 | >>> from itertools import repeat 51 | >>> 52 | >>> kwargs = {} 53 | >>> stream = ({'x': x} for x in range(5)) 54 | >>> tuples = zip(stream, repeat(None)) 55 | >>> next(parser(stream, None, tuples, **kwargs)) == {'x': 4} 56 | True 57 | """ 58 | return reversed(list(stream)) 59 | 60 | 61 | @operator(isasync=True, **OPTS) 62 | def async_pipe(*args, **kwargs): 63 | """An operator that asynchronously reverses the order of source items in 64 | a stream. Note that this pipe is not lazy. 65 | 66 | Args: 67 | items (Iter[dict]): The source. 68 | kwargs (dict): The keyword arguments passed to the wrapper 69 | 70 | Returns: 71 | Deferred: twisted.internet.defer.Deferred iterator of the number of 72 | counted items 73 | 74 | Examples: 75 | >>> from riko.bado import react 76 | >>> from riko.bado.mock import FakeReactor 77 | >>> 78 | >>> def run(reactor): 79 | ... callback = lambda x: print(next(x) == {'x': 4}) 80 | ... items = ({'x': x} for x in range(5)) 81 | ... d = async_pipe(items) 82 | ... return d.addCallbacks(callback, logger.error) 83 | >>> 84 | >>> try: 85 | ... react(run, _reactor=FakeReactor()) 86 | ... except SystemExit: 87 | ... pass 88 | ... 89 | True 90 | """ 91 | return parser(*args, **kwargs) 92 | 93 | 94 | @operator(**OPTS) 95 | def pipe(*args, **kwargs): 96 | """An operator that eagerly reverses the order of source items in a stream. 97 | 98 | Args: 99 | items (Iter[dict]): The source. 100 | kwargs (dict): The keyword arguments passed to the wrapper 101 | 102 | Yields: 103 | dict: an item 104 | 105 | Examples: 106 | >>> items = ({'x': x} for x in range(5)) 107 | >>> next(pipe(items)) == {'x': 4} 108 | True 109 | """ 110 | return parser(*args, **kwargs) 111 | -------------------------------------------------------------------------------- /riko/modules/simplemath.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # vim: sw=4:ts=4:expandtab 3 | """ 4 | riko.modules.simplemath 5 | ~~~~~~~~~~~~~~~~~~~~~~~ 6 | Provides functions for performing simple mathematical operations, e.g., 7 | addition, subtraction, multiplication, division, modulo, averages, etc. 8 | 9 | Examples: 10 | basic usage:: 11 | 12 | >>> from decimal import Decimal 13 | >>> from riko.modules.simplemath import pipe 14 | >>> 15 | >>> conf = {'op': 'divide', 'other': '5'} 16 | >>> next(pipe({'content': '10'}, conf=conf))['simplemath'] 17 | Decimal('2') 18 | 19 | Attributes: 20 | OPTS (dict): The default pipe options 21 | DEFAULTS (dict): The default parser options 22 | """ 23 | import operator 24 | 25 | from . import processor 26 | import pygogo as gogo 27 | 28 | OPTS = {"ftype": "decimal", "ptype": "decimal", "field": "content"} 29 | DEFAULTS = {} 30 | logger = gogo.Gogo(__name__, monolog=True).logger 31 | 32 | 33 | def mean(*nums): 34 | try: 35 | return sum(nums) / len(nums) 36 | except ZeroDivisionError: 37 | return float("inf") 38 | 39 | 40 | OPS = { 41 | "add": operator.add, 42 | "subtract": operator.sub, 43 | "multiply": operator.mul, 44 | "mean": mean, 45 | "divide": operator.truediv, 46 | "floor": operator.floordiv, 47 | "modulo": operator.mod, 48 | "power": operator.pow, 49 | } 50 | 51 | 52 | def parser(num, objconf, skip=False, **kwargs): 53 | """Parsers the pipe content 54 | 55 | Args: 56 | num (Decimal): The first number to operate on 57 | objconf (obj): The pipe configuration (an Objectify instance) 58 | skip (bool): Don't parse the content 59 | 60 | Returns: 61 | dict: The formatted item 62 | 63 | Examples: 64 | >>> from meza.fntools import Objectify 65 | >>> conf = {'op': 'divide', 'other': 4} 66 | >>> objconf = Objectify(conf) 67 | >>> parser(10, objconf, conf=conf) 68 | 2.5 69 | """ 70 | operation = OPS[kwargs["conf"]["op"]] 71 | return kwargs["stream"] if skip else operation(num, objconf.other) 72 | 73 | 74 | @processor(DEFAULTS, isasync=True, **OPTS) 75 | def async_pipe(*args, **kwargs): 76 | """A processor module that asynchronously performs basic arithmetic, such 77 | as addition and subtraction. 78 | 79 | Args: 80 | item (dict): The entry to process 81 | kwargs (dict): The keyword arguments passed to the wrapper 82 | 83 | Kwargs: 84 | conf (dict): The pipe configuration. Must contain the keys 'other' 85 | and 'op'. 86 | 87 | other (number): The second number to operate on. 88 | op (str): The math operation. Must be one of 'addition', 89 | 'subtraction', 'multiplication', 'division', 'modulo', 90 | 'floor', 'power', or 'mean'. 91 | 92 | assign (str): Attribute to assign parsed content (default: simplemath) 93 | field (str): Item attribute from which to obtain the first number to 94 | operate on (default: 'content') 95 | 96 | Returns: 97 | Deferred: twisted.internet.defer.Deferred item with formatted currency 98 | 99 | Examples: 100 | >>> from riko.bado import react 101 | >>> from riko.bado.mock import FakeReactor 102 | >>> 103 | >>> def run(reactor): 104 | ... callback = lambda x: print(next(x)['simplemath']) 105 | ... conf = {'op': 'divide', 'other': '5'} 106 | ... d = async_pipe({'content': '10'}, conf=conf) 107 | ... return d.addCallbacks(callback, logger.error) 108 | >>> 109 | >>> try: 110 | ... react(run, _reactor=FakeReactor()) 111 | ... except SystemExit: 112 | ... pass 113 | ... 114 | 2 115 | """ 116 | return parser(*args, **kwargs) 117 | 118 | 119 | @processor(DEFAULTS, **OPTS) 120 | def pipe(*args, **kwargs): 121 | """A processor module that performs basic arithmetic, such as addition and 122 | subtraction. 123 | 124 | Args: 125 | item (dict): The entry to process 126 | kwargs (dict): The keyword arguments passed to the wrapper 127 | 128 | Kwargs: 129 | conf (dict): The pipe configuration. Must contain the keys 'other' 130 | and 'op'. 131 | 132 | other (number): The second number to operate on. 133 | op (str): The math operation. Must be one of 'addition', 134 | 'subtraction', 'multiplication', 'division', 'modulo', 135 | 'floor', 'power', or 'mean'. 136 | 137 | assign (str): Attribute to assign parsed content (default: simplemath) 138 | field (str): Item attribute from which to obtain the first number to 139 | operate on (default: 'content') 140 | 141 | Returns: 142 | dict: an item with math result 143 | 144 | Examples: 145 | >>> from decimal import Decimal 146 | >>> conf = {'op': 'divide', 'other': '5'} 147 | >>> next(pipe({'content': '10'}, conf=conf))['simplemath'] 148 | Decimal('2') 149 | >>> kwargs = {'conf': conf, 'field': 'num', 'assign': 'result'} 150 | >>> next(pipe({'num': '10'}, **kwargs))['result'] 151 | Decimal('2') 152 | """ 153 | return parser(*args, **kwargs) 154 | -------------------------------------------------------------------------------- /riko/modules/slugify.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # vim: sw=4:ts=4:expandtab 3 | """ 4 | riko.modules.slugify 5 | ~~~~~~~~~~~~~~~~~~~~ 6 | Provides functions for slugifying text. 7 | 8 | Examples: 9 | basic usage:: 10 | 11 | >>> from riko.modules.slugify import pipe 12 | >>> 13 | >>> next(pipe({'content': 'hello world'}))['slugify'] == 'hello-world' 14 | True 15 | 16 | Attributes: 17 | OPTS (dict): The default pipe options 18 | DEFAULTS (dict): The default parser options 19 | """ 20 | import pygogo as gogo 21 | 22 | from slugify import slugify 23 | from . import processor 24 | 25 | OPTS = {"ftype": "text", "extract": "separator", "field": "content"} 26 | DEFAULTS = {"separator": "-"} 27 | logger = gogo.Gogo(__name__, monolog=True).logger 28 | 29 | 30 | def parser(word, separator, skip=False, **kwargs): 31 | """Parsers the pipe content 32 | 33 | Args: 34 | word (str): The string to transform 35 | separator (str): The slug separator. 36 | skip (bool): Don't parse the content 37 | kwargs (dict): Keyword arguments 38 | 39 | Kwargs: 40 | assign (str): Attribute to assign parsed content (default: slugify) 41 | stream (dict): The original item 42 | 43 | Returns: 44 | dict: The item 45 | 46 | Examples: 47 | >>> from meza.fntools import Objectify 48 | >>> 49 | >>> item = {'content': 'hello world'} 50 | >>> kwargs = {'stream': item} 51 | >>> parser(item['content'], '-', **kwargs) == 'hello-world' 52 | True 53 | """ 54 | if skip: 55 | parsed = kwargs["stream"] 56 | else: 57 | parsed = slugify(word.strip(), separator=separator) 58 | 59 | return parsed 60 | 61 | 62 | @processor(DEFAULTS, isasync=True, **OPTS) 63 | def async_pipe(*args, **kwargs): 64 | """A processor module that asynchronously slugifies the field of an item. 65 | 66 | Args: 67 | item (dict): The entry to process 68 | kwargs (dict): The keyword arguments passed to the wrapper 69 | 70 | Kwargs: 71 | assign (str): Attribute to assign parsed content (default: slugify) 72 | field (str): Item attribute to operate on (default: 'content') 73 | 74 | Returns: 75 | Deferred: twisted.internet.defer.Deferred item with slugified content 76 | 77 | Examples: 78 | >>> from riko.bado import react 79 | >>> from riko.bado.mock import FakeReactor 80 | >>> 81 | >>> def run(reactor): 82 | ... callback = lambda x: print(next(x)['slugify'] == 'hello-world') 83 | ... d = async_pipe({'content': 'hello world'}) 84 | ... return d.addCallbacks(callback, logger.error) 85 | >>> 86 | >>> try: 87 | ... react(run, _reactor=FakeReactor()) 88 | ... except SystemExit: 89 | ... pass 90 | ... 91 | True 92 | """ 93 | return parser(*args, **kwargs) 94 | 95 | 96 | @processor(DEFAULTS, **OPTS) 97 | def pipe(*args, **kwargs): 98 | """A processor that slugifies the field of an item. 99 | 100 | Args: 101 | item (dict): The entry to process 102 | kwargs (dict): The keyword arguments passed to the wrapper 103 | 104 | Kwargs: 105 | conf (dict): The pipe configuration. May contain the key 'separator'. 106 | separator (str): The slug separator (default: '-') 107 | 108 | assign (str): Attribute to assign parsed content (default: slugify) 109 | field (str): Item attribute to operate on (default: 'content') 110 | 111 | Yields: 112 | dict: an item with slugified content 113 | 114 | Examples: 115 | >>> next(pipe({'content': 'hello world'}))['slugify'] == 'hello-world' 116 | True 117 | >>> slugified = 'hello_world' 118 | >>> conf = {'separator': '_'} 119 | >>> item = {'title': 'hello world'} 120 | >>> kwargs = {'conf': conf, 'field': 'title', 'assign': 'result'} 121 | >>> next(pipe(item, **kwargs))['result'] == slugified 122 | True 123 | """ 124 | return parser(*args, **kwargs) 125 | -------------------------------------------------------------------------------- /riko/modules/split.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # vim: sw=4:ts=4:expandtab 3 | """ 4 | riko.modules.split 5 | ~~~~~~~~~~~~~~~~~~ 6 | Provides functions for splitting a stream into identical copies 7 | 8 | Use split when you want to perform different operations on data from the same 9 | stream. The Union module is the reverse of Split, it merges multiple input 10 | streams into a single combined stream. 11 | 12 | Examples: 13 | basic usage:: 14 | 15 | >>> from riko.modules.split import pipe 16 | >>> 17 | >>> stream1, stream2 = pipe({'x': x} for x in range(5)) 18 | >>> next(stream1) == {'x': 0} 19 | True 20 | 21 | Attributes: 22 | OPTS (dict): The default pipe options 23 | DEFAULTS (dict): The default parser options 24 | """ 25 | 26 | from copy import deepcopy 27 | 28 | from . import operator 29 | import pygogo as gogo 30 | 31 | OPTS = {"extract": "splits", "ptype": "int", "objectify": False} 32 | DEFAULTS = {"splits": 2} 33 | logger = gogo.Gogo(__name__, monolog=True).logger 34 | 35 | 36 | def parser(stream, splits, tuples, **kwargs): 37 | """Parses the pipe content 38 | 39 | Args: 40 | stream (Iter[dict]): The source stream. Note: this shares the `tuples` 41 | iterator, so consuming it will consume `tuples` as well. 42 | 43 | splits (int): the number of copies to create. 44 | 45 | tuples (Iter[(dict, obj)]): Iterable of tuples of (item, splits) 46 | `item` is an element in the source stream (a DotDict instance) 47 | and `splits` is an int. Note: this shares the `stream` iterator, 48 | so consuming it will consume `stream` as well. 49 | 50 | kwargs (dict): Keyword arguments. 51 | 52 | Yields: 53 | Iter(dict): a stream of items 54 | 55 | Examples: 56 | >>> from itertools import repeat 57 | >>> 58 | >>> conf = {'splits': 3} 59 | >>> kwargs = {'conf': conf} 60 | >>> stream = (({'x': x}) for x in range(5)) 61 | >>> tuples = zip(stream, repeat(conf['splits'])) 62 | >>> streams = parser(stream, conf['splits'], tuples, **kwargs) 63 | >>> next(next(streams)) == {'x': 0} 64 | True 65 | """ 66 | source = list(stream) 67 | 68 | # deepcopy each item so that each split is independent 69 | for num in range(splits): 70 | yield map(deepcopy, source) 71 | 72 | 73 | @operator(DEFAULTS, isasync=True, **OPTS) 74 | def async_pipe(*args, **kwargs): 75 | """An operator that asynchronously and eagerly splits a stream into identical 76 | copies. Note that this pipe is not lazy. 77 | 78 | Args: 79 | items (Iter[dict]): The source stream. 80 | kwargs (dict): The keyword arguments passed to the wrapper 81 | 82 | Kwargs: 83 | conf (dict): The pipe configuration. May contain the key 'splits'. 84 | 85 | splits (int): the number of copies to create (default: 2). 86 | 87 | Returns: 88 | Deferred: twisted.internet.defer.Deferred iterable of streams 89 | 90 | Examples: 91 | >>> from riko.bado import react 92 | >>> from riko.bado.mock import FakeReactor 93 | >>> 94 | >>> def run(reactor): 95 | ... callback = lambda x: print(next(next(x)) == {'x': 0}) 96 | ... d = async_pipe({'x': x} for x in range(5)) 97 | ... return d.addCallbacks(callback, logger.error) 98 | >>> 99 | >>> try: 100 | ... react(run, _reactor=FakeReactor()) 101 | ... except SystemExit: 102 | ... pass 103 | ... 104 | True 105 | """ 106 | return parser(*args, **kwargs) 107 | 108 | 109 | @operator(DEFAULTS, **OPTS) 110 | def pipe(*args, **kwargs): 111 | """An operator that eagerly splits a stream into identical copies. 112 | Note that this pipe is not lazy. 113 | 114 | Args: 115 | items (Iter[dict]): The source stream. 116 | kwargs (dict): The keyword arguments passed to the wrapper 117 | 118 | Kwargs: 119 | conf (dict): The pipe configuration. May contain the key 'splits'. 120 | 121 | splits (int): the number of copies to create (default: 2). 122 | 123 | Yields: 124 | Iter(dict): a stream of items 125 | 126 | Examples: 127 | >>> items = [{'x': x} for x in range(5)] 128 | >>> stream1, stream2 = pipe(items) 129 | >>> next(stream1) == {'x': 0} 130 | True 131 | >>> len(list(pipe(items, conf={'splits': '3'}))) 132 | 3 133 | """ 134 | return parser(*args, **kwargs) 135 | -------------------------------------------------------------------------------- /riko/modules/strconcat.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # vim: sw=4:ts=4:expandtab 3 | """ 4 | riko.modules.strconcat 5 | ~~~~~~~~~~~~~~~~~~~~~~ 6 | Provides functions for concatenating strings (aka stringbuilder). 7 | 8 | Useful when you need to build a string from multiple substrings, some coded 9 | into the pipe, other parts supplied when the pipe is run. 10 | 11 | Examples: 12 | basic usage:: 13 | 14 | >>> from riko.modules.strconcat import pipe 15 | >>> 16 | >>> item = {'word': 'hello'} 17 | >>> part = [{'subkey': 'word'}, {'value': ' world'}] 18 | >>> next(pipe(item, conf={'part': part}))['strconcat'] == 'hello world' 19 | True 20 | 21 | Attributes: 22 | OPTS (dict): The default pipe options 23 | DEFAULTS (dict): The default parser options 24 | """ 25 | import pygogo as gogo 26 | 27 | from . import processor 28 | 29 | OPTS = {"listize": True, "extract": "part"} 30 | logger = gogo.Gogo(__name__, monolog=True).logger 31 | 32 | 33 | def parser(_, parts, skip=False, **kwargs): 34 | """Parses the pipe content 35 | 36 | Args: 37 | _ (dict): The item (ignored) 38 | parts (List[str]): The content to concatenate 39 | skip (bool): Don't parse the content 40 | kwargs (dict): Keyword arguments 41 | 42 | Kwargs: 43 | stream (dict): The original item 44 | 45 | Returns: 46 | str: The concatenated string 47 | 48 | Examples: 49 | >>> parser(None, ['one', 'two']) == 'onetwo' 50 | True 51 | """ 52 | if skip: 53 | parsed = kwargs["stream"] 54 | else: 55 | parsed = "".join(str(p) for p in parts if p) 56 | 57 | return parsed 58 | 59 | 60 | @processor(isasync=True, **OPTS) 61 | def async_pipe(*args, **kwargs): 62 | """A processor module that asynchronously concatenates strings. 63 | 64 | Args: 65 | item (dict): The entry to process 66 | kwargs (dict): The keyword arguments passed to the wrapper 67 | 68 | Kwargs: 69 | conf (dict): The pipe configuration. Must contain the key 'part'. 70 | 71 | part (dict): can be either a dict or list of dicts. Must contain 72 | one of the following keys: 'value', 'subkey', or 'terminal'. 73 | 74 | value (str): The substring value 75 | subkey (str): The item attribute from which to obtain a 76 | substring 77 | 78 | terminal (str): The id of a pipe from which to obtain a 79 | substring 80 | 81 | assign (str): Attribute to assign parsed content (default: strconcat) 82 | 83 | Returns: 84 | Deferred: twisted.internet.defer.Deferred item with concatenated content 85 | 86 | Examples: 87 | >>> from riko.bado import react 88 | >>> from riko.bado.mock import FakeReactor 89 | >>> 90 | >>> def run(reactor): 91 | ... callback = lambda x: print(next(x)['strconcat']) 92 | ... item = {'title': 'Hello world'} 93 | ... part = [{'subkey': 'title'}, {'value': 's'}] 94 | ... d = async_pipe(item, conf={'part': part}) 95 | ... return d.addCallbacks(callback, logger.error) 96 | >>> 97 | >>> try: 98 | ... react(run, _reactor=FakeReactor()) 99 | ... except SystemExit: 100 | ... pass 101 | ... 102 | Hello worlds 103 | """ 104 | return parser(*args, **kwargs) 105 | 106 | 107 | @processor(**OPTS) 108 | def pipe(*args, **kwargs): 109 | """A processor that concatenates strings. 110 | 111 | Args: 112 | item (dict): The entry to process 113 | kwargs (dict): The keyword arguments passed to the wrapper 114 | 115 | Kwargs: 116 | conf (dict): The pipe configuration. Must contain the key 'part'. 117 | 118 | part (dict): can be either a dict or list of dicts. Must contain 119 | one of the following keys: 'value', 'subkey', or 'terminal'. 120 | 121 | value (str): The substring value 122 | subkey (str): The item attribute from which to obtain a 123 | substring 124 | 125 | terminal (str): The id of a pipe from which to obtain a 126 | substring 127 | 128 | assign (str): Attribute to assign parsed content (default: strconcat) 129 | 130 | Yields: 131 | dict: an item with concatenated content 132 | 133 | Examples: 134 | >>> item = {'img': {'src': 'http://www.site.com'}} 135 | >>> part = [ 136 | ... {'value': ''} 137 | ... ] 138 | >>> conf = {'part': part} 139 | >>> resp = '' 140 | >>> next(pipe(item, conf=conf))['strconcat'] == resp 141 | True 142 | >>> next(pipe(item, conf=conf, assign='result'))['result'] == resp 143 | True 144 | """ 145 | return parser(*args, **kwargs) 146 | -------------------------------------------------------------------------------- /riko/modules/substr.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # vim: sw=4:ts=4:expandtab 3 | """ 4 | riko.modules.substr 5 | ~~~~~~~~~~~~~~~~~~~ 6 | Provides functions for obtaining a portion of a string. 7 | 8 | You enter two numbers to tell the module the starting character position and 9 | the length of the resulting substring. If your input string is "ABCDEFG", then 10 | a From value of 2 and length of 4 gives you a resulting string of "CDEF". 11 | Notice that the first character in the original string is 0, not 1. 12 | 13 | If you enter too long a length, the module just returns a substring to the end 14 | of the input string, so if you enter a From of 3 and a length of 100, you'll 15 | get a result of "DEFG". 16 | Examples: 17 | basic usage:: 18 | 19 | >>> from riko.modules.substr import pipe 20 | >>> 21 | >>> conf = {'start': '3', 'length': '4'} 22 | >>> item = {'content': 'hello world'} 23 | >>> next(pipe(item, conf=conf))['substr'] == 'lo w' 24 | True 25 | 26 | Attributes: 27 | OPTS (dict): The default pipe options 28 | DEFAULTS (dict): The default parser options 29 | """ 30 | 31 | from . import processor 32 | import pygogo as gogo 33 | 34 | OPTS = {"ftype": "text", "ptype": "int", "field": "content"} 35 | DEFAULTS = {"start": 0, "length": 0} 36 | logger = gogo.Gogo(__name__, monolog=True).logger 37 | 38 | 39 | def parser(word, objconf, skip=False, **kwargs): 40 | """Parses the pipe content 41 | 42 | Args: 43 | word (str): The string to parse 44 | objconf (obj): The pipe configuration (an Objectify instance) 45 | skip (bool): Don't parse the content 46 | kwargs (dict): Keyword arguments 47 | 48 | Kwargs: 49 | assign (str): Attribute to assign parsed content (default: substr) 50 | stream (dict): The original item 51 | 52 | Returns: 53 | dict: The item 54 | 55 | Examples: 56 | >>> from meza.fntools import Objectify 57 | >>> 58 | >>> item = {'content': 'hello world'} 59 | >>> conf = {'start': 3, 'length': 4} 60 | >>> args = item['content'], Objectify(conf) 61 | >>> kwargs = {'stream': item, 'conf': conf} 62 | >>> parser(*args, **kwargs) == 'lo w' 63 | True 64 | """ 65 | end = objconf.start + objconf.length if objconf.length else None 66 | return kwargs["stream"] if skip else word[objconf.start : end] 67 | 68 | 69 | @processor(DEFAULTS, isasync=True, **OPTS) 70 | def async_pipe(*args, **kwargs): 71 | """A processor module that asynchronously returns a substring of a field 72 | of an item. 73 | 74 | Args: 75 | item (dict): The entry to process 76 | kwargs (dict): The keyword arguments passed to the wrapper 77 | 78 | Kwargs: 79 | conf (dict): The pipe configuration. May contain the keys 'start' or 80 | 'length'. 81 | 82 | start (int): starting position (default: 0) 83 | length (int): count of characters to return (default: 0, i.e., all) 84 | 85 | assign (str): Attribute to assign parsed content (default: substr) 86 | field (str): Item attribute to operate on (default: 'content') 87 | 88 | Returns: 89 | Deferred: twisted.internet.defer.Deferred item with transformed content 90 | 91 | Examples: 92 | >>> from riko.bado import react 93 | >>> from riko.bado.mock import FakeReactor 94 | >>> 95 | >>> def run(reactor): 96 | ... callback = lambda x: print(next(x)['substr']) 97 | ... conf = {'start': '3', 'length': '4'} 98 | ... d = async_pipe({'content': 'hello world'}, conf=conf) 99 | ... return d.addCallbacks(callback, logger.error) 100 | >>> 101 | >>> try: 102 | ... react(run, _reactor=FakeReactor()) 103 | ... except SystemExit: 104 | ... pass 105 | ... 106 | lo w 107 | """ 108 | return parser(*args, **kwargs) 109 | 110 | 111 | @processor(**OPTS) 112 | def pipe(*args, **kwargs): 113 | """A processor that returns a substring of a field of an item. 114 | 115 | Args: 116 | item (dict): The entry to process 117 | kwargs (dict): The keyword arguments passed to the wrapper 118 | 119 | Kwargs: 120 | conf (dict): The pipe configuration. May contain the keys 'start' or 121 | 'length'. 122 | 123 | start (int): starting position (default: 0) 124 | length (int): count of characters to return (default: 0, i.e., all) 125 | 126 | assign (str): Attribute to assign parsed content (default: substr) 127 | field (str): Item attribute to operate on (default: 'content') 128 | 129 | Yields: 130 | dict: an item with the substring 131 | 132 | Examples: 133 | >>> conf = {'start': '3', 'length': '4'} 134 | >>> item = {'content': 'hello world'} 135 | >>> next(pipe(item, conf=conf))['substr'] == 'lo w' 136 | True 137 | >>> conf = {'start': '3'} 138 | >>> kwargs = {'conf': conf, 'field': 'title', 'assign': 'result'} 139 | >>> next(pipe({'title': 'Greetings'}, **kwargs))['result'] == 'etings' 140 | True 141 | """ 142 | return parser(*args, **kwargs) 143 | -------------------------------------------------------------------------------- /riko/modules/sum.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # vim: sw=4:ts=4:expandtab 3 | """ 4 | riko.modules.sum 5 | ~~~~~~~~~~~~~~~~ 6 | Provides functions for summing the items in a stream. 7 | 8 | Examples: 9 | basic usage:: 10 | 11 | >>> from riko.modules.sum import pipe 12 | >>> 13 | >>> stream = pipe({'content': x} for x in range(5)) 14 | >>> next(stream) == {'sum': Decimal('10')} 15 | True 16 | 17 | Attributes: 18 | OPTS (dict): The default pipe options 19 | DEFAULTS (dict): The default parser options 20 | """ 21 | import itertools as it 22 | import pygogo as gogo 23 | 24 | from operator import itemgetter 25 | from decimal import Decimal 26 | 27 | from . import operator 28 | 29 | OPTS = {} 30 | DEFAULTS = {"sum_key": "content", "group_key": None} 31 | logger = gogo.Gogo(__name__, monolog=True).logger 32 | 33 | 34 | def parser(stream, objconf, tuples, **kwargs): 35 | """Parses the pipe content 36 | 37 | Args: 38 | stream (Iter[dict]): The source. Note: this shares the `tuples` 39 | iterator, so consuming it will consume `tuples` as well. 40 | 41 | objconf (obj): The pipe configuration (an Objectify instance) 42 | 43 | tuples (Iter[(dict, obj)]): Iterable of tuples of (item, objconf) 44 | `item` is an element in the source stream and `objconf` is the item 45 | configuration (an Objectify instance). Note: this shares the 46 | `stream` iterator, so consuming it will consume `stream` as well. 47 | 48 | kwargs (dict): Keyword arguments. 49 | 50 | Kwargs: 51 | conf (dict): The pipe configuration. 52 | 53 | Returns: 54 | mixed: The output either a dict or iterable of dicts 55 | 56 | Examples: 57 | >>> from itertools import repeat 58 | >>> from meza.fntools import Objectify 59 | >>> 60 | >>> stream = ({'content': x} for x in range(5)) 61 | >>> objconf = Objectify({'sum_key': 'content'}) 62 | >>> tuples = zip(stream, repeat(objconf)) 63 | >>> args = (stream, objconf, tuples) 64 | >>> parser(*args, assign='content') == {'content': Decimal('10')} 65 | True 66 | >>> objconf = Objectify({'sum_key': 'amount', 'group_key': 'x'}) 67 | >>> stream = [ 68 | ... {'amount': 2, 'x': 'one'}, 69 | ... {'amount': 1, 'x': 'one'}, 70 | ... {'amount': 2, 'x': 'two'}] 71 | >>> tuples = zip(stream, repeat(objconf)) 72 | >>> summed = parser(stream, objconf, tuples) 73 | >>> next(summed) == {'one': Decimal('3')} 74 | True 75 | >>> next(summed) == {'two': Decimal('2')} 76 | True 77 | """ 78 | _sum = lambda group: sum(Decimal(g[objconf.sum_key]) for g in group) 79 | 80 | if objconf.group_key: 81 | keyfunc = itemgetter(objconf.group_key) 82 | sorted_stream = sorted(stream, key=keyfunc) 83 | grouped = it.groupby(sorted_stream, keyfunc) 84 | summed = ({key: _sum(group)} for key, group in grouped) 85 | else: 86 | summed = {kwargs["assign"]: _sum(stream)} 87 | 88 | return summed 89 | 90 | 91 | @operator(DEFAULTS, isasync=True, **OPTS) 92 | def async_pipe(*args, **kwargs): 93 | """An operator that asynchronously and eagerly sums fields of items 94 | in a stream. Note that this pipe is not lazy if `group_key` is specified. 95 | 96 | Args: 97 | items (Iter[dict]): The source. 98 | kwargs (dict): The keyword arguments passed to the wrapper 99 | 100 | Kwargs: 101 | conf (dict): The pipe configuration. May contain the keys 'sum_key' or 102 | 'group_key'. 103 | 104 | sum_key (str): Item attribute to sum. (default: 'content'). 105 | 106 | group_key (str): Item attribute to sum by. This will group items 107 | in the stream by the given key and report a sum for each 108 | group (default: None). 109 | 110 | assign (str): Attribute to assign parsed content. If `sum_key` is set, 111 | this is ignored and the group keys are used instead. (default: 112 | content) 113 | 114 | Returns: 115 | Deferred: twisted.internet.defer.Deferred iterator of the summed items 116 | 117 | Examples: 118 | >>> from riko.bado import react 119 | >>> from riko.bado.mock import FakeReactor 120 | >>> 121 | >>> def run(reactor): 122 | ... callback = lambda x: print(next(x) == {'sum': Decimal('10')}) 123 | ... items = ({'content': x} for x in range(5)) 124 | ... d = async_pipe(items) 125 | ... return d.addCallbacks(callback, logger.error) 126 | >>> 127 | >>> try: 128 | ... react(run, _reactor=FakeReactor()) 129 | ... except SystemExit: 130 | ... pass 131 | ... 132 | True 133 | """ 134 | return parser(*args, **kwargs) 135 | 136 | 137 | @operator(DEFAULTS, **OPTS) 138 | def pipe(*args, **kwargs): 139 | """An operator that eagerly sums fields of items in a stream. 140 | Note that this pipe is not lazy if `group_key` is specified. 141 | 142 | Args: 143 | items (Iter[dict]): The source. 144 | kwargs (dict): The keyword arguments passed to the wrapper 145 | 146 | Kwargs: 147 | conf (dict): The pipe configuration. May contain the keys 'sum_key' or 148 | 'group_key'. 149 | 150 | sum_key (str): Item attribute to sum. (default: 'content'). 151 | 152 | group_key (str): Item attribute to sum by. This will group items 153 | in the stream by the given key and report a sum for each 154 | group (default: None). 155 | 156 | assign (str): Attribute to assign parsed content. If `sum_key` is set, 157 | this is ignored and the group keys are used instead. (default: 158 | content) 159 | 160 | Yields: 161 | dict: the summed items 162 | 163 | Examples: 164 | >>> stream = ({'content': x} for x in range(5)) 165 | >>> next(pipe(stream)) == {'sum': Decimal('10')} 166 | True 167 | >>> stream = [ 168 | ... {'amount': 2, 'x': 'one'}, 169 | ... {'amount': 1, 'x': 'one'}, 170 | ... {'amount': 2, 'x': 'two'}] 171 | >>> summed = pipe(stream, conf={'sum_key': 'amount', 'group_key': 'x'}) 172 | >>> next(summed) == {'one': Decimal('3')} 173 | True 174 | >>> next(summed) == {'two': Decimal('2')} 175 | True 176 | """ 177 | return parser(*args, **kwargs) 178 | -------------------------------------------------------------------------------- /riko/modules/tail.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # vim: sw=4:ts=4:expandtab 3 | """ 4 | riko.modules.tail 5 | ~~~~~~~~~~~~~~~~~ 6 | Provides functions for truncating a stream to the last N items. 7 | 8 | Contrast this with the Truncate module, which limits the output to the first N 9 | items. 10 | 11 | Examples: 12 | basic usage:: 13 | 14 | >>> from riko.modules.tail import pipe 15 | >>> 16 | >>> items = ({'x': x} for x in range(5)) 17 | >>> next(pipe(items, conf={'count': 2})) == {'x': 3} 18 | True 19 | 20 | Attributes: 21 | OPTS (dict): The default pipe options 22 | DEFAULTS (dict): The default parser options 23 | """ 24 | from collections import deque 25 | 26 | from . import operator 27 | import pygogo as gogo 28 | 29 | OPTS = {"ptype": "int"} 30 | logger = gogo.Gogo(__name__, monolog=True).logger 31 | 32 | 33 | def parser(stream, objconf, tuples, **kwargs): 34 | """Parses the pipe content 35 | 36 | Args: 37 | stream (Iter[dict]): The source. Note: this shares the `tuples` 38 | iterator, so consuming it will consume `tuples` as well. 39 | 40 | objconf (obj): the item independent configuration (an Objectify 41 | instance). 42 | 43 | tuples (Iter[(dict, obj)]): Iterable of tuples of (item, objconf) 44 | `item` is an element in the source stream and `objconf` is the item 45 | configuration (an Objectify instance). Note: this shares the 46 | `stream` iterator, so consuming it will consume `stream` as well. 47 | 48 | kwargs (dict): Keyword arguments. 49 | 50 | Returns: 51 | List(dict): The output stream 52 | 53 | Examples: 54 | >>> from meza.fntools import Objectify 55 | >>> from itertools import repeat 56 | >>> 57 | >>> kwargs = {'count': 2} 58 | >>> objconf = Objectify(kwargs) 59 | >>> stream = ({'x': x} for x in range(5)) 60 | >>> tuples = zip(stream, repeat(objconf)) 61 | >>> parser(stream, objconf, tuples, **kwargs)[0] == {'x': 3} 62 | True 63 | """ 64 | return deque(stream, objconf.count) 65 | 66 | 67 | @operator(isasync=True, **OPTS) 68 | def async_pipe(*args, **kwargs): 69 | """An operator that asynchronously truncates a stream to the last N items. 70 | 71 | Args: 72 | items (Iter[dict]): The source. 73 | kwargs (dict): The keyword arguments passed to the wrapper 74 | 75 | Kwargs: 76 | conf (dict): The pipe configuration. Must contain the key 'count'. 77 | 78 | count (int): desired stream length 79 | 80 | Returns: 81 | Deferred: twisted.internet.defer.Deferred truncated stream 82 | 83 | Examples: 84 | >>> from riko.bado import react 85 | >>> from riko.bado.mock import FakeReactor 86 | >>> 87 | >>> def run(reactor): 88 | ... callback = lambda x: print(next(x) == {'x': 3}) 89 | ... items = ({'x': x} for x in range(5)) 90 | ... d = async_pipe(items, conf={'count': 2}) 91 | ... return d.addCallbacks(callback, logger.error) 92 | >>> 93 | >>> try: 94 | ... react(run, _reactor=FakeReactor()) 95 | ... except SystemExit: 96 | ... pass 97 | ... 98 | True 99 | """ 100 | return parser(*args, **kwargs) 101 | 102 | 103 | @operator(**OPTS) 104 | def pipe(*args, **kwargs): 105 | """An operator that truncates a stream to the last N items. 106 | 107 | Args: 108 | items (Iter[dict]): The source. 109 | kwargs (dict): The keyword arguments passed to the wrapper 110 | 111 | Kwargs: 112 | conf (dict): The pipe configuration. Must contain the key 'count'. 113 | 114 | count (int): desired stream length 115 | 116 | Yields: 117 | dict: an item 118 | 119 | Examples: 120 | >>> items = [{'x': x} for x in range(5)] 121 | >>> next(pipe(items, conf={'count': 2})) == {'x': 3} 122 | True 123 | """ 124 | return parser(*args, **kwargs) 125 | -------------------------------------------------------------------------------- /riko/modules/tokenizer.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # vim: sw=4:ts=4:expandtab 3 | """ 4 | riko.modules.tokenizer 5 | ~~~~~~~~~~~~~~~~~~~~~~ 6 | Provides functions for splitting a string into an array of strings. 7 | 8 | A delimiter string (often just a single character) tells the module where to 9 | split the input string. The delimiter string doesn't appear in the output. 10 | 11 | Examples: 12 | basic usage:: 13 | 14 | >>> from riko.modules.tokenizer import pipe 15 | >>> 16 | >>> item = {'content': 'Once,twice,thrice'} 17 | >>> next(pipe(item))['tokenizer'][0] == {'content': 'Once'} 18 | True 19 | 20 | Attributes: 21 | OPTS (dict): The default pipe options 22 | DEFAULTS (dict): The default parser options 23 | """ 24 | import pygogo as gogo 25 | 26 | from . import processor 27 | 28 | OPTS = {"ftype": "text", "field": "content"} 29 | DEFAULTS = {"delimiter": ",", "dedupe": False, "sort": False, "token_key": "content"} 30 | 31 | logger = gogo.Gogo(__name__, monolog=True).logger 32 | 33 | 34 | def parser(content, objconf, skip=False, **kwargs): 35 | """Parses the pipe content 36 | 37 | Args: 38 | content (str): The content to tokenize 39 | objconf (obj): The pipe configuration (an Objectify instance) 40 | skip (bool): Don't parse the content 41 | 42 | Returns: 43 | Iter[dict]: The stream of items 44 | 45 | Examples: 46 | >>> from meza.fntools import Objectify 47 | >>> objconf = Objectify({'delimiter': '//', 'token_key': 'token'}) 48 | >>> content = 'Once//twice//thrice//no more' 49 | >>> result = parser(content, objconf) 50 | >>> next(result) == {'token': 'Once'} 51 | True 52 | """ 53 | if skip: 54 | stream = kwargs["stream"] 55 | else: 56 | splits = [s.strip() for s in content.split(objconf.delimiter) if s] 57 | deduped = set(splits) if objconf.dedupe else splits 58 | keyfunc = lambda s: s.lower() 59 | chunks = sorted(deduped, key=keyfunc) if objconf.sort else deduped 60 | stream = ({objconf.token_key: chunk} for chunk in chunks) 61 | 62 | return stream 63 | 64 | 65 | @processor(DEFAULTS, isasync=True, **OPTS) 66 | def async_pipe(*args, **kwargs): 67 | """A processor module that asynchronously splits a string by a delimiter. 68 | 69 | Args: 70 | item (dict): The entry to process 71 | kwargs (dict): The keyword arguments passed to the wrapper 72 | 73 | Kwargs: 74 | conf (dict): The pipe configuration. May contain the keys 'delimiter', 75 | 'dedupe', 'sort', or 'token_key'. 76 | 77 | delimiter (str): the delimiter string (default: ',') 78 | dedupe (bool): Remove duplicates (default: False). 79 | sort (bool): Sort tokens (default: False) 80 | 81 | token_key (str): Attribute to assign individual tokens (default: 82 | content) 83 | 84 | assign (str): Attribute to assign parsed content (default: 85 | tokenizer) 86 | 87 | field (str): Item attribute from which to obtain the string to be 88 | tokenized (default: 'content') 89 | 90 | emit (bool): Return the stream as is and don't assign it to an item 91 | attribute (default: False) 92 | 93 | Returns: 94 | Deferred: twisted.internet.defer.Deferred item with tokenized content 95 | 96 | Examples: 97 | >>> from riko.bado import react 98 | >>> from riko.bado.mock import FakeReactor 99 | >>> 100 | >>> def run(reactor): 101 | ... resp = {'content': 'Once'} 102 | ... attr = 'tokenizer' 103 | ... callback = lambda x: print(next(x)[attr][0] == resp) 104 | ... item = {'content': 'Once,twice,thrice,no more'} 105 | ... d = async_pipe(item) 106 | ... return d.addCallbacks(callback, logger.error) 107 | >>> 108 | >>> try: 109 | ... react(run, _reactor=FakeReactor()) 110 | ... except SystemExit: 111 | ... pass 112 | ... 113 | True 114 | """ 115 | return parser(*args, **kwargs) 116 | 117 | 118 | @processor(DEFAULTS, **OPTS) 119 | def pipe(*args, **kwargs): 120 | """A processor that splits a string by a delimiter. 121 | 122 | Args: 123 | item (dict): The entry to process 124 | kwargs (dict): The keyword arguments passed to the wrapper 125 | 126 | Kwargs: 127 | conf (dict): The pipe configuration. May contain the keys 'delimiter', 128 | 'dedupe', 'sort', or 'token_key'. 129 | 130 | delimiter (str): the delimiter string (default: ',') 131 | dedupe (bool): Remove duplicates (default: False). 132 | sort (bool): Sort tokens (default: False) 133 | token_key (str): Attribute to assign individual tokens (default: 134 | content) 135 | 136 | assign (str): Attribute to assign parsed content (default: 137 | tokenizer) 138 | 139 | field (str): Item attribute from which to obtain the string to be 140 | tokenized (default: content) 141 | 142 | emit (bool): Return the stream as is and don't assign it to an item 143 | attribute (default: False) 144 | 145 | Returns: 146 | dict: an item with tokenized content 147 | 148 | Examples: 149 | >>> item = {'description': 'Once//twice//thrice//no more'} 150 | >>> conf = {'delimiter': '//', 'sort': True} 151 | >>> kwargs = {'field': 'description', 'assign': 'tokens'} 152 | >>> resp = {'content': 'no more'} 153 | >>> next(pipe(item, conf=conf, **kwargs))['tokens'][0] == resp 154 | True 155 | >>> kwargs.update({'emit': True}) 156 | >>> conf.update({'token_key': 'token'}) 157 | >>> next(pipe(item, conf=conf, **kwargs)) == {'token': 'no more'} 158 | True 159 | """ 160 | return parser(*args, **kwargs) 161 | -------------------------------------------------------------------------------- /riko/modules/truncate.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # vim: sw=4:ts=4:expandtab 3 | """ 4 | riko.modules.truncate 5 | ~~~~~~~~~~~~~~~~~~~~~ 6 | Provides functions for returning a specified number of items from a stream. 7 | 8 | Contrast this with the tail module, which also limits the number of items, 9 | but returns items from the bottom of the stream. 10 | 11 | Examples: 12 | basic usage:: 13 | 14 | >>> from riko.modules.truncate import pipe 15 | >>> 16 | >>> items = ({'x': x} for x in range(5)) 17 | >>> len(list(pipe(items, conf={'count': '4'}))) 18 | 4 19 | 20 | Attributes: 21 | OPTS (dict): The default pipe options 22 | DEFAULTS (dict): The default parser options 23 | """ 24 | from itertools import islice 25 | 26 | from . import operator 27 | import pygogo as gogo 28 | 29 | OPTS = {"ptype": "int"} 30 | DEFAULTS = {"start": 0} 31 | logger = gogo.Gogo(__name__, monolog=True).logger 32 | 33 | 34 | def parser(stream, objconf, tuples, **kwargs): 35 | """Parses the pipe content 36 | 37 | Args: 38 | stream (Iter[dict]): The source. Note: this shares the `tuples` 39 | iterator, so consuming it will consume `tuples` as well. 40 | 41 | objconf (obj): the item independent configuration (an Objectify 42 | instance). 43 | 44 | tuples (Iter[(dict, obj)]): Iterable of tuples of (item, objconf) 45 | `item` is an element in the source stream and `objconf` is the item 46 | configuration (an Objectify instance). Note: this shares the 47 | `stream` iterator, so consuming it will consume `stream` as well. 48 | 49 | kwargs (dict): Keyword arguments. 50 | 51 | Returns: 52 | Iter(dict): The output stream 53 | 54 | Examples: 55 | >>> from meza.fntools import Objectify 56 | >>> from itertools import repeat 57 | >>> 58 | >>> kwargs = {'count': 4, 'start': 0} 59 | >>> objconf = Objectify(kwargs) 60 | >>> stream = ({'x': x} for x in range(5)) 61 | >>> tuples = zip(stream, repeat(objconf)) 62 | >>> len(list(parser(stream, objconf, tuples, **kwargs))) 63 | 4 64 | """ 65 | start = objconf.start 66 | stop = start + objconf.count 67 | return islice(stream, start, stop) 68 | 69 | 70 | @operator(DEFAULTS, isasync=True, **OPTS) 71 | def async_pipe(*args, **kwargs): 72 | """An operator that asynchronously returns a specified number of items 73 | from a stream. 74 | 75 | Args: 76 | items (Iter[dict]): The source. 77 | kwargs (dict): The keyword arguments passed to the wrapper 78 | 79 | Kwargs: 80 | conf (dict): The pipe configuration. Must contain the key 'count'. 81 | May contain the key 'start'. 82 | 83 | count (int): desired stream length 84 | start (int): starting location (default: 0) 85 | 86 | Returns: 87 | Deferred: twisted.internet.defer.Deferred truncated stream 88 | 89 | Examples: 90 | >>> from riko.bado import react 91 | >>> from riko.bado.mock import FakeReactor 92 | >>> 93 | >>> def run(reactor): 94 | ... callback = lambda x: print(len(list(x))) 95 | ... items = ({'x': x} for x in range(5)) 96 | ... d = async_pipe(items, conf={'count': 4}) 97 | ... return d.addCallbacks(callback, logger.error) 98 | >>> 99 | >>> try: 100 | ... react(run, _reactor=FakeReactor()) 101 | ... except SystemExit: 102 | ... pass 103 | ... 104 | 4 105 | """ 106 | return parser(*args, **kwargs) 107 | 108 | 109 | @operator(DEFAULTS, **OPTS) 110 | def pipe(*args, **kwargs): 111 | """An operator that returns a specified number of items from a stream. 112 | 113 | Args: 114 | items (Iter[dict]): The source. 115 | kwargs (dict): The keyword arguments passed to the wrapper 116 | 117 | Kwargs: 118 | conf (dict): The pipe configuration. Must contain the key 'count'. 119 | May contain the key 'start'. 120 | 121 | start (int): starting location (default: 0) 122 | count (int): desired stream length 123 | 124 | Yields: 125 | dict: an item 126 | 127 | Examples: 128 | >>> items = [{'x': x} for x in range(5)] 129 | >>> len(list(pipe(items, conf={'count': '4'}))) 130 | 4 131 | >>> stream = pipe(items, conf={'count': '2', 'start': '2'}) 132 | >>> next(stream) == {'x': 2} 133 | True 134 | """ 135 | return parser(*args, **kwargs) 136 | -------------------------------------------------------------------------------- /riko/modules/typecast.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # vim: sw=4:ts=4:expandtab 3 | """ 4 | riko.modules.typecast 5 | ~~~~~~~~~~~~~~~~~~~~~ 6 | Provides functions for casting fields into specific types. 7 | 8 | Examples: 9 | basic usage:: 10 | 11 | >>> from riko.modules.typecast import pipe 12 | >>> 13 | >>> conf = {'type': 'date'} 14 | >>> next(pipe({'content': '5/4/82'}, conf=conf))['typecast']['year'] 15 | 1982 16 | 17 | Attributes: 18 | OPTS (dict): The default pipe options 19 | DEFAULTS (dict): The default parser options 20 | """ 21 | import pygogo as gogo 22 | 23 | from . import processor 24 | from riko.utils import cast 25 | 26 | OPTS = {"field": "content"} 27 | DEFAULTS = {"type": "text"} 28 | logger = gogo.Gogo(__name__, monolog=True).logger 29 | 30 | 31 | def parser(content, objconf, skip=False, **kwargs): 32 | """Parsers the pipe content 33 | 34 | Args: 35 | content (scalar): The content to cast 36 | objconf (obj): The pipe configuration (an Objectify instance) 37 | skip (bool): Don't parse the content 38 | kwargs (dict): Keyword arguments 39 | 40 | Kwargs: 41 | assign (str): Attribute to assign parsed content (default: typecast) 42 | stream (dict): The original item 43 | 44 | Returns: 45 | dict: The item 46 | 47 | Examples: 48 | >>> from meza.fntools import Objectify 49 | >>> 50 | >>> item = {'content': '1.0'} 51 | >>> objconf = Objectify({'type': 'int'}) 52 | >>> kwargs = {'stream': item, 'assign': 'content'} 53 | >>> parser(item['content'], objconf, **kwargs) 54 | 1 55 | """ 56 | return kwargs["stream"] if skip else cast(content, objconf.type) 57 | 58 | 59 | @processor(DEFAULTS, isasync=True, **OPTS) 60 | def async_pipe(*args, **kwargs): 61 | """A processor module that asynchronously parses a URL into its components. 62 | 63 | Args: 64 | item (dict): The entry to process 65 | kwargs (dict): The keyword arguments passed to the wrapper 66 | 67 | Kwargs: 68 | conf (dict): The pipe configuration. May contain the key 'type'. 69 | type (str): The object type to cast to (default: text) 70 | 71 | assign (str): Attribute to assign parsed content (default: typecast) 72 | field (str): Item attribute to operate on (default: 'content') 73 | 74 | Returns: 75 | Deferred: twisted.internet.defer.Deferred item with type casted content 76 | 77 | Examples: 78 | >>> from riko.bado import react 79 | >>> from riko.bado.mock import FakeReactor 80 | >>> 81 | >>> def run(reactor): 82 | ... callback = lambda x: print(next(x)['typecast']) 83 | ... d = async_pipe({'content': '1.0'}, conf={'type': 'int'}) 84 | ... return d.addCallbacks(callback, logger.error) 85 | >>> 86 | >>> try: 87 | ... react(run, _reactor=FakeReactor()) 88 | ... except SystemExit: 89 | ... pass 90 | ... 91 | 1 92 | """ 93 | return parser(*args, **kwargs) 94 | 95 | 96 | @processor(DEFAULTS, **OPTS) 97 | def pipe(*args, **kwargs): 98 | """A processor that parses a URL into its components. 99 | 100 | Args: 101 | item (dict): The entry to process 102 | kwargs (dict): The keyword arguments passed to the wrapper 103 | 104 | Kwargs: 105 | conf (dict): The pipe configuration. May contain the key 'type'. 106 | type (str): The object type to cast to (default: text) 107 | 108 | assign (str): Attribute to assign parsed content (default: typecast) 109 | field (str): Item attribute to operate on (default: 'content') 110 | 111 | Yields: 112 | dict: an item with type casted content 113 | 114 | Examples: 115 | >>> from datetime import datetime as dt 116 | >>> next(pipe({'content': '1.0'}, conf={'type': 'int'}))['typecast'] 117 | 1 118 | >>> item = {'content': '5/4/82'} 119 | >>> conf = {'type': 'date'} 120 | >>> date = next(pipe(item, conf=conf, emit=True))['date'] 121 | >>> date.isoformat() == '1982-05-04T00:00:00+00:00' 122 | True 123 | >>> item = {'content': dt(1982, 5, 4).timetuple()} 124 | >>> date = next(pipe(item, conf=conf, emit=True))['date'] 125 | >>> date.isoformat() == '1982-05-04T00:00:00+00:00' 126 | True 127 | >>> item = {'content': 'False'} 128 | >>> conf = {'type': 'bool'} 129 | >>> next(pipe(item, conf=conf, emit=True)) 130 | False 131 | """ 132 | return parser(*args, **kwargs) 133 | -------------------------------------------------------------------------------- /riko/modules/udf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # vim: sw=4:ts=4:expandtab 3 | """ 4 | riko.modules.udf 5 | ~~~~~~~~~~~~~~~~ 6 | Provides functions for performing an arbitrary (user-defined) function on stream 7 | items. 8 | 9 | Examples: 10 | basic usage:: 11 | 12 | >>> from riko.modules.udf import pipe 13 | >>> 14 | >>> items = [{'x': x} for x in range(5)] 15 | >>> func = lambda item: {'y': item['x'] + 3} 16 | >>> next(pipe(items, func=func)) 17 | {'y': 3} 18 | """ 19 | from . import operator 20 | import pygogo as gogo 21 | 22 | logger = gogo.Gogo(__name__, monolog=True).logger 23 | 24 | 25 | def parser(stream, objconf, tuples, **kwargs): 26 | """Parses the pipe content 27 | 28 | Args: 29 | stream (Iter[dict]): The source. Note: this shares the `tuples` 30 | iterator, so consuming it will consume `tuples` as well. 31 | 32 | objconf (obj): the item independent configuration (an Objectify 33 | instance). 34 | 35 | tuples (Iter[(dict, obj)]): Iterable of tuples of (item, objconf) 36 | `item` is an element in the source stream and `objconf` is the item 37 | configuration (an Objectify instance). Note: this shares the 38 | `stream` iterator, so consuming it will consume `stream` as well. 39 | 40 | kwargs (dict): Keyword arguments. 41 | 42 | Returns: 43 | Iter(dict): The output stream 44 | 45 | Examples: 46 | >>> from meza.fntools import Objectify 47 | >>> from itertools import repeat 48 | >>> 49 | >>> func = lambda item: {'y': item['x'] + 3} 50 | >>> stream = ({'x': x} for x in range(5)) 51 | >>> tuples = zip(stream, repeat(None)) 52 | >>> next(parser(stream, None, tuples, func=func)) 53 | {'y': 3} 54 | """ 55 | return map(kwargs["func"], stream) 56 | 57 | 58 | @operator(isasync=True) 59 | def async_pipe(*args, **kwargs): 60 | """An operator that asynchronously performs an arbitrary (user-defined) function on 61 | items of a stream. 62 | 63 | Args: 64 | items (Iter[dict]): The source. 65 | kwargs (dict): The keyword arguments passed to the wrapper 66 | 67 | Kwargs: 68 | func (callable): User defined function to apply to each stream item. 69 | 70 | Returns: 71 | Deferred: twisted.internet.defer.Deferred truncated stream 72 | 73 | Examples: 74 | >>> from riko.bado import react 75 | >>> from riko.bado.mock import FakeReactor 76 | >>> 77 | >>> def run(reactor): 78 | ... callback = lambda x: print(next(x)) 79 | ... func = lambda item: {'y': item['x'] + 3} 80 | ... items = ({'x': x} for x in range(5)) 81 | ... d = async_pipe(items, func=func) 82 | ... return d.addCallbacks(callback, logger.error) 83 | >>> 84 | >>> try: 85 | ... react(run, _reactor=FakeReactor()) 86 | ... except SystemExit: 87 | ... pass 88 | ... 89 | {'y': 3} 90 | """ 91 | return parser(*args, **kwargs) 92 | 93 | 94 | @operator() 95 | def pipe(*args, **kwargs): 96 | """An operator that performs an arbitrary (user-defined) function on items of a 97 | stream. 98 | 99 | Args: 100 | items (Iter[dict]): The source. 101 | kwargs (dict): The keyword arguments passed to the wrapper 102 | 103 | Kwargs: 104 | func (callable): User defined function to apply to each stream item. 105 | 106 | Yields: 107 | dict: an item 108 | 109 | Examples: 110 | >>> items = [{'x': x} for x in range(5)] 111 | >>> func = lambda item: {'y': item['x'] + 3} 112 | >>> next(pipe(items, func=func)) 113 | {'y': 3} 114 | """ 115 | return parser(*args, **kwargs) 116 | -------------------------------------------------------------------------------- /riko/modules/union.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # vim: sw=4:ts=4:expandtab 3 | """ 4 | riko.modules.union 5 | ~~~~~~~~~~~~~~~~~~ 6 | Provides functions for merging separate sources into a single stream of items. 7 | 8 | Examples: 9 | basic usage:: 10 | 11 | >>> from riko.modules.union import pipe 12 | >>> 13 | >>> items = ({'x': x} for x in range(5)) 14 | >>> other1 = ({'x': x + 5} for x in range(5)) 15 | >>> other2 = ({'x': x + 10} for x in range(5)) 16 | >>> len(list(pipe(items, others=[other1, other2]))) 17 | 15 18 | 19 | Attributes: 20 | OPTS (dict): The default pipe options 21 | DEFAULTS (dict): The default parser options 22 | """ 23 | import pygogo as gogo 24 | 25 | from itertools import chain 26 | 27 | from . import operator 28 | from riko.utils import multiplex 29 | 30 | # disable `dictize` since we do not need to access the configuration 31 | OPTS = {"dictize": False} 32 | logger = gogo.Gogo(__name__, monolog=True).logger 33 | 34 | 35 | def parser(stream, objconf, tuples, **kwargs): 36 | """Parses the pipe content 37 | 38 | Args: 39 | stream (Iter[dict]): The source. Note: this shares the `tuples` 40 | iterator, so consuming it will consume `tuples` as well. 41 | 42 | objconf (obj): the item independent configuration (an Objectify 43 | instance). 44 | 45 | tuples (Iter[(dict, obj)]): Iterable of tuples of (item, objconf) 46 | `item` is an element in the source stream and `objconf` is the item 47 | configuration (an Objectify instance). Note: this shares the 48 | `stream` iterator, so consuming it will consume `stream` as well. 49 | 50 | kwargs (dict): Keyword arguments. 51 | 52 | Kwargs: 53 | others (List[Iter(dict)]): List of streams to join 54 | 55 | Returns: 56 | Iter(dict): The output stream 57 | 58 | Examples: 59 | >>> from itertools import repeat 60 | >>> 61 | >>> stream = ({'x': x} for x in range(5)) 62 | >>> other1 = ({'x': x + 5} for x in range(5)) 63 | >>> other2 = ({'x': x + 10} for x in range(5)) 64 | >>> kwargs = {'others': [other1, other2]} 65 | >>> tuples = zip(stream, repeat(None)) 66 | >>> len(list(parser(stream, None, tuples, **kwargs))) 67 | 15 68 | """ 69 | return chain(stream, multiplex(kwargs["others"])) 70 | 71 | 72 | @operator(isasync=True, **OPTS) 73 | def async_pipe(*args, **kwargs): 74 | """An operator that asynchronously merges multiple source streams together. 75 | 76 | Args: 77 | items (Iter[dict]): The source. 78 | kwargs (dict): The keyword arguments passed to the wrapper 79 | 80 | Kwargs: 81 | others (List[Iter(dict)]): List of streams to join 82 | 83 | Returns: 84 | Deferred: twisted.internet.defer.Deferred iterator of the merged streams 85 | 86 | Examples: 87 | >>> from riko.bado import react 88 | >>> from riko.bado.mock import FakeReactor 89 | >>> 90 | >>> def run(reactor): 91 | ... callback = lambda x: print(len(list(x))) 92 | ... items = ({'x': x} for x in range(5)) 93 | ... other1 = ({'x': x + 5} for x in range(5)) 94 | ... other2 = ({'x': x + 10} for x in range(5)) 95 | ... d = async_pipe(items, others=[other1, other2]) 96 | ... return d.addCallbacks(callback, logger.error) 97 | >>> 98 | >>> try: 99 | ... react(run, _reactor=FakeReactor()) 100 | ... except SystemExit: 101 | ... pass 102 | ... 103 | 15 104 | """ 105 | return parser(*args, **kwargs) 106 | 107 | 108 | @operator(**OPTS) 109 | def pipe(*args, **kwargs): 110 | """An operator that merges multiple streams together. 111 | 112 | Args: 113 | items (Iter[dict]): The source. 114 | kwargs (dict): The keyword arguments passed to the wrapper 115 | 116 | Kwargs: 117 | others (List[Iter(dict)]): List of streams to join 118 | 119 | Yields: 120 | dict: a merged stream item 121 | 122 | Examples: 123 | >>> items = ({'x': x} for x in range(5)) 124 | >>> other1 = ({'x': x + 5} for x in range(5)) 125 | >>> other2 = ({'x': x + 10} for x in range(5)) 126 | >>> len(list(pipe(items, others=[other1, other2]))) 127 | 15 128 | """ 129 | return parser(*args, **kwargs) 130 | -------------------------------------------------------------------------------- /riko/modules/uniq.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # vim: sw=4:ts=4:expandtab 3 | """ 4 | riko.modules.uniq 5 | ~~~~~~~~~~~~~~~~~ 6 | Provides functions for filtering out non unique items from a stream according 7 | to a specified field. 8 | 9 | Removes duplicate items. You select the element to filter on, and Unique 10 | removes the duplicates 11 | 12 | Examples: 13 | basic usage:: 14 | 15 | >>> from riko.modules.uniq import pipe 16 | >>> 17 | >>> items = ({'x': x, 'mod': x % 2} for x in range(5)) 18 | >>> list(pipe(items, conf={'uniq_key': 'mod'})) == [ 19 | ... {'x': 0, 'mod': 0}, {'x': 1, 'mod': 1}] 20 | True 21 | 22 | Attributes: 23 | OPTS (dict): The default pipe options 24 | DEFAULTS (dict): The default parser options 25 | """ 26 | import pygogo as gogo 27 | 28 | from collections import deque 29 | from . import operator 30 | 31 | OPTS = {} 32 | DEFAULTS = {"uniq_key": "content", "limit": 1024} 33 | logger = gogo.Gogo(__name__, monolog=True).logger 34 | 35 | 36 | def parser(stream, objconf, tuples, **kwargs): 37 | """Parses the pipe content 38 | 39 | Args: 40 | stream (Iter[dict]): The source. Note: this shares the `tuples` 41 | iterator, so consuming it will consume `tuples` as well. 42 | 43 | objconf (obj): The pipe configuration (an Objectify instance) 44 | 45 | tuples (Iter[(dict, obj)]): Iterable of tuples of (item, rules) 46 | `item` is an element in the source stream (a DotDict instance) 47 | and `rules` is the rule configuration (an Objectify instance). 48 | Note: this shares the `stream` iterator, so consuming it will 49 | consume `stream` as well. 50 | 51 | kwargs (dict): Keyword arguments. 52 | 53 | Yields: 54 | dict: The output 55 | 56 | Examples: 57 | >>> from itertools import repeat 58 | >>> from meza.fntools import Objectify 59 | >>> 60 | >>> conf = {'uniq_key': 'mod', 'limit': 256} 61 | >>> objconf = Objectify(conf) 62 | >>> kwargs = {'conf': conf} 63 | >>> stream = ({'x': x, 'mod': x % 2} for x in range(5)) 64 | >>> tuples = zip(stream, repeat(objconf)) 65 | >>> list(parser(stream, objconf, tuples, **kwargs)) == [ 66 | ... {'x': 0, 'mod': 0}, {'x': 1, 'mod': 1}] 67 | True 68 | """ 69 | key, limit = objconf.uniq_key, int(objconf.limit) 70 | seen = deque(maxlen=limit) 71 | 72 | for item in stream: 73 | value = item.get(key) 74 | 75 | if value not in seen: 76 | seen.append(value) 77 | yield item 78 | 79 | 80 | @operator(DEFAULTS, isasync=True, **OPTS) 81 | def async_pipe(*args, **kwargs): 82 | """An operator that asynchronously filters out non unique items according 83 | to a specified field. 84 | 85 | Args: 86 | items (Iter[dict]): The source. 87 | kwargs (dict): The keyword arguments passed to the wrapper 88 | 89 | Kwargs: 90 | conf (dict): The pipe configuration. May contain the keys 'uniq_key' or 91 | 'limit'. 92 | 93 | uniq_key (str): Item attribute which should be unique (default: 94 | 'content'). 95 | 96 | limit (int): Maximum number of unique items to track (default: 97 | 1024) 98 | 99 | Returns: 100 | Deferred: twisted.internet.defer.Deferred stream 101 | 102 | Examples: 103 | >>> from riko.bado import react 104 | >>> from riko.bado.mock import FakeReactor 105 | >>> 106 | >>> def run(reactor): 107 | ... callback = lambda x: print([i['mod'] for i in x]) 108 | ... items = ({'x': x, 'mod': x % 2} for x in range(5)) 109 | ... d = async_pipe(items, conf={'uniq_key': 'mod'}) 110 | ... return d.addCallbacks(callback, logger.error) 111 | >>> 112 | >>> try: 113 | ... react(run, _reactor=FakeReactor()) 114 | ... except SystemExit: 115 | ... pass 116 | ... 117 | [0, 1] 118 | """ 119 | return parser(*args, **kwargs) 120 | 121 | 122 | @operator(DEFAULTS, **OPTS) 123 | def pipe(*args, **kwargs): 124 | """An operator that filters out non unique items according to a specified 125 | field. 126 | 127 | Args: 128 | items (Iter[dict]): The source. 129 | kwargs (dict): The keyword arguments passed to the wrapper 130 | 131 | Kwargs: 132 | conf (dict): The pipe configuration. May contain the keys 'uniq_key' or 133 | 'limit'. 134 | 135 | uniq_key (str): Item attribute which should be unique (default: 136 | 'content'). 137 | 138 | limit (int): Maximum number of unique items to track (default: 139 | 1024) 140 | 141 | Yields: 142 | dict: an item 143 | 144 | Examples: 145 | >>> items = [{'content': x, 'mod': x % 2} for x in range(5)] 146 | >>> list(pipe(items, conf={'uniq_key': 'mod'})) == [ 147 | ... {'mod': 0, 'content': 0}, {'mod': 1, 'content': 1}] 148 | True 149 | >>> stream = pipe(items) 150 | >>> next(stream) == {'mod': 0, 'content': 0} 151 | True 152 | >>> [item['content'] for item in stream] 153 | [1, 2, 3, 4] 154 | """ 155 | return parser(*args, **kwargs) 156 | -------------------------------------------------------------------------------- /riko/modules/urlparse.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # vim: sw=4:ts=4:expandtab 3 | """ 4 | riko.modules.urlparse 5 | ~~~~~~~~~~~~~~~~~~~~~ 6 | Provides functions for parsing a URL into its six components. 7 | 8 | Examples: 9 | basic usage:: 10 | 11 | >>> from riko.modules.urlparse import pipe 12 | >>> 13 | >>> item = {'content': 'http://yahoo.com'} 14 | >>> scheme = {'component': 'scheme', 'content': 'http'} 15 | >>> next(pipe(item))['urlparse'][0] == scheme 16 | True 17 | 18 | Attributes: 19 | OPTS (dict): The default pipe options 20 | DEFAULTS (dict): The default parser options 21 | """ 22 | import pygogo as gogo 23 | 24 | from urllib.parse import urlparse 25 | from . import processor 26 | 27 | OPTS = {"ftype": "text", "field": "content"} 28 | DEFAULTS = {"parse_key": "content"} 29 | logger = gogo.Gogo(__name__, monolog=True).logger 30 | 31 | 32 | def parser(url, objconf, skip=False, **kwargs): 33 | """Parsers the pipe content 34 | 35 | Args: 36 | url (str): The link to parse 37 | objconf (obj): The pipe configuration (an Objectify instance) 38 | skip (bool): Don't parse the content 39 | kwargs (dict): Keyword arguments 40 | 41 | Kwargs: 42 | assign (str): Attribute to assign parsed content (default: urlparse) 43 | stream (dict): The original item 44 | 45 | Returns: 46 | dict: The item 47 | 48 | Examples: 49 | >>> from meza.fntools import Objectify 50 | >>> 51 | >>> objconf = Objectify({'parse_key': 'value'}) 52 | >>> result = parser('http://yahoo.com', objconf) 53 | >>> next(result) == {'component': 'scheme', 'value': 'http'} 54 | True 55 | """ 56 | if skip: 57 | stream = kwargs["stream"] 58 | else: 59 | parsed = urlparse(url) 60 | 61 | # noqa pylint: disable=dict-items-not-iterating 62 | items = parsed._asdict().items() 63 | stream = ({"component": k, objconf.parse_key: v} for k, v in items) 64 | 65 | return stream 66 | 67 | 68 | @processor(DEFAULTS, isasync=True, **OPTS) 69 | def async_pipe(*args, **kwargs): 70 | """A processor module that asynchronously parses a URL into its components. 71 | 72 | Args: 73 | item (dict): The entry to process 74 | kwargs (dict): The keyword arguments passed to the wrapper 75 | 76 | Kwargs: 77 | assign (str): Attribute to assign parsed content (default: urlparse) 78 | field (str): Item attribute to operate on (default: 'content') 79 | 80 | Returns: 81 | Deferred: twisted.internet.defer.Deferred item with parsed content 82 | 83 | Examples: 84 | >>> from riko.bado import react 85 | >>> from riko.bado.mock import FakeReactor 86 | >>> 87 | >>> scheme = {'component': 'scheme', 'content': 'http'} 88 | >>> 89 | >>> def run(reactor): 90 | ... callback = lambda x: print(next(x)['urlparse'][0] == scheme) 91 | ... d = async_pipe({'content': 'http://yahoo.com'}) 92 | ... return d.addCallbacks(callback, logger.error) 93 | >>> 94 | >>> try: 95 | ... react(run, _reactor=FakeReactor()) 96 | ... except SystemExit: 97 | ... pass 98 | ... 99 | True 100 | """ 101 | return parser(*args, **kwargs) 102 | 103 | 104 | @processor(DEFAULTS, **OPTS) 105 | def pipe(*args, **kwargs): 106 | """A processor that parses a URL into its components. 107 | 108 | Args: 109 | item (dict): The entry to process 110 | kwargs (dict): The keyword arguments passed to the wrapper 111 | 112 | Kwargs: 113 | conf (dict): The pipe configuration. May contain the key 'parse_key'. 114 | 115 | parse_key (str): Attribute to assign individual tokens (default: 116 | content) 117 | 118 | assign (str): Attribute to assign parsed content (default: urlparse) 119 | field (str): Item attribute to operate on (default: 'content') 120 | 121 | Yields: 122 | dict: an item with parsed content 123 | 124 | Examples: 125 | >>> item = {'content': 'http://yahoo.com'} 126 | >>> scheme = {'component': 'scheme', 'content': 'http'} 127 | >>> next(pipe(item))['urlparse'][0] == scheme 128 | True 129 | >>> conf = {'parse_key': 'value'} 130 | >>> next(pipe(item, conf=conf, emit=True)) == { 131 | ... 'component': 'scheme', 'value': 'http'} 132 | True 133 | """ 134 | return parser(*args, **kwargs) 135 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [wheel] 2 | universal=1 3 | 4 | [nosetests] 5 | verbosity=1 6 | # detailed-errors=1 7 | cover-package=riko 8 | with-doctest=1 9 | 10 | [unittest] 11 | plugins=doctest 12 | process-restartworker=1 13 | 14 | [doctest] 15 | always-on=True 16 | 17 | [flake8] 18 | ignore=W191,E203,E126,E128,E122,E731,F403,F401,BLK100 19 | max-complexity=10 20 | max-line-length=88 21 | exclude=./build/*,./dist/*,./.tox/*,./app/lib/.ropeproject/*,./eggs/* 22 | 23 | [pylint] 24 | max-line-length=88 25 | 26 | [pylint.messages_control] 27 | disable=C0330,C0326 28 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import sys 5 | 6 | from os import path as p 7 | from setuptools import setup, find_packages 8 | 9 | import pkutils 10 | 11 | PARENT_DIR = p.abspath(p.dirname(__file__)) 12 | 13 | sys.dont_write_bytecode = True 14 | requirements = list(pkutils.parse_requirements("requirements.txt")) 15 | dev_requirements = list(pkutils.parse_requirements("dev-requirements.txt")) 16 | optional = "optional-requirements.txt" 17 | opt_requirements = set(pkutils.parse_requirements(optional)) 18 | readme = pkutils.read("README.rst") 19 | module = pkutils.parse_module(p.join(PARENT_DIR, "riko", "__init__.py")) 20 | license = module.__license__ 21 | version = module.__version__ 22 | project = module.__title__ 23 | description = module.__description__ 24 | user = "nerevu" 25 | 26 | # Setup requirements 27 | setup_require = [r for r in dev_requirements if "pkutils" in r] 28 | 29 | # Optional requirements 30 | xml_require = [r for r in opt_requirements if not r.lower().startswith("t")] 31 | async_require = list(opt_requirements.difference(xml_require)) 32 | 33 | setup( 34 | name=project, 35 | version=version, 36 | description=description, 37 | long_description=readme, 38 | author=module.__author__, 39 | author_email=module.__email__, 40 | url=pkutils.get_url(project, user), 41 | download_url=pkutils.get_dl_url(project, user, version), 42 | packages=find_packages(exclude=["tests"]), 43 | include_package_data=True, 44 | package_data={ 45 | "data": ["data/*"], 46 | "helpers": ["helpers/*"], 47 | "tests": ["tests/*"], 48 | "docs": ["docs/*"], 49 | "examples": ["examples/*"], 50 | }, 51 | install_requires=requirements, 52 | extras_require={ 53 | "xml": xml_require, 54 | "async": async_require, 55 | "develop": dev_requirements, 56 | }, 57 | setup_requires=setup_require, 58 | test_suite="nose.collector", 59 | tests_require=dev_requirements, 60 | license=license, 61 | zip_safe=False, 62 | keywords=[project] + description.split(" "), 63 | classifiers=[ 64 | pkutils.get_license(license), 65 | pkutils.get_status(version), 66 | "Natural Language :: English", 67 | "Programming Language :: Python", 68 | "Programming Language :: Python :: 3", 69 | "Programming Language :: Python :: 3 :: Only", 70 | "Programming Language :: Python :: 3.7", 71 | "Programming Language :: Python :: 3.8", 72 | "Programming Language :: Python :: 3.9", 73 | "Programming Language :: Python :: Implementation :: CPython", 74 | "Programming Language :: Python :: Implementation :: PyPy", 75 | "Environment :: Console", 76 | "Topic :: Software Development :: Libraries :: Python Modules", 77 | "Intended Audience :: Developers", 78 | "Operating System :: POSIX :: Linux", 79 | "Operating System :: MacOS :: MacOS X", 80 | "Operating System :: Microsoft :: Windows", 81 | ], 82 | platforms=["MacOS X", "Windows", "Linux"], 83 | scripts=[p.join("bin", "runpipe")], 84 | ) 85 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # vim: sw=4:ts=4:expandtab 3 | """ 4 | tests 5 | ~~~~~ 6 | 7 | Provides application unit tests 8 | """ 9 | 10 | 11 | def setup_package(): 12 | """database context creation""" 13 | global initialized 14 | initialized = True 15 | print("Test Package Setup\n") 16 | 17 | 18 | def teardown_package(): 19 | """database context removal""" 20 | global initialized 21 | initialized = False 22 | print("Test Package Teardown\n") 23 | -------------------------------------------------------------------------------- /tests/test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # vim: sw=4:ts=4:expandtab 4 | 5 | """ 6 | tests.test 7 | ~~~~~~~~~~ 8 | 9 | Provides scripttests to test riko runpipe CLI functionality. 10 | """ 11 | 12 | import sys 13 | import pygogo as gogo 14 | 15 | from difflib import unified_diff 16 | from os import path as p 17 | from io import StringIO, open 18 | from timeit import default_timer as timer 19 | 20 | from scripttest import TestFileEnvironment 21 | 22 | sys.path.append("../riko") 23 | 24 | try: 25 | from riko.bado import _isasync 26 | except ImportError: 27 | _isasync = False 28 | 29 | PARENT_DIR = p.abspath(p.dirname(p.dirname(__file__))) 30 | 31 | 32 | def main(script, tests, verbose=False, stop=True): 33 | """Main method 34 | 35 | Returns 0 on success, 1 on failure 36 | """ 37 | failures = 0 38 | logger = gogo.Gogo(__name__, verbose=verbose).logger 39 | short_script = p.basename(script) 40 | env = TestFileEnvironment(".scripttest") 41 | 42 | start = timer() 43 | 44 | for pos, test in enumerate(tests): 45 | num = pos + 1 46 | opts, arguments, expected = test 47 | joined_opts = " ".join(opts) if opts else "" 48 | joined_args = '"%s"' % '" "'.join(arguments) if arguments else "" 49 | command = "%s %s %s" % (script, joined_opts, joined_args) 50 | short_command = "%s %s %s" % (short_script, joined_opts, joined_args) 51 | result = env.run(command, cwd=PARENT_DIR, expect_stderr=True) 52 | output = result.stdout 53 | 54 | if isinstance(expected, bool): 55 | text = StringIO(output).read() 56 | outlines = [str(bool(text))] 57 | checklines = StringIO(str(expected)).readlines() 58 | elif p.isfile(expected): 59 | outlines = StringIO(output).readlines() 60 | 61 | with open(expected, encoding="utf-8") as f: 62 | checklines = f.readlines() 63 | else: 64 | outlines = StringIO(output).readlines() 65 | checklines = StringIO(expected).readlines() 66 | 67 | args = [checklines, outlines] 68 | kwargs = {"fromfile": "expected", "tofile": "got"} 69 | diffs = "".join(unified_diff(*args, **kwargs)) 70 | 71 | if diffs: 72 | failures += 1 73 | msg = "ERROR! Output from test #%i:\n %s\n" % (num, short_command) 74 | msg += "doesn't match:\n %s\n" % expected 75 | msg += diffs if diffs else "" 76 | else: 77 | logger.debug(output) 78 | msg = "Scripttest #%i: %s ... ok" % (num, short_command) 79 | 80 | logger.info(msg) 81 | 82 | if stop and failures: 83 | break 84 | 85 | time = timer() - start 86 | logger.info("%s" % "-" * 70) 87 | end = "FAILED (failures=%i)" % failures if failures else "OK" 88 | logger.info("Ran %i scripttests in %0.3fs\n\n%s" % (num, time, end)) 89 | sys.exit(failures) 90 | 91 | 92 | if __name__ == "__main__": 93 | demo = p.join(PARENT_DIR, "bin", "runpipe") 94 | benchmark = p.join(PARENT_DIR, "bin", "benchmark") 95 | text = "Deadline to clear up health law eligibility near 682\n" 96 | runpipe_tests = [([], ["demo"], text), ([], ["simple1"], "'farechart'\n")] 97 | 98 | if _isasync: 99 | runpipe_tests += [ 100 | (["-a"], ["demo"], text), 101 | (["-a"], ["simple1"], "'farechart'\n"), 102 | ] 103 | 104 | main(demo, runpipe_tests) 105 | main(benchmark, [([], [], "")]) 106 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | toxworkdir={homedir}/.tox/venvcache/riko 3 | envlist=py{37,38,39,py3}-{test,style,optional} 4 | 5 | [testenv] 6 | distribute=False 7 | whitelist_externals = 8 | {toxinidir}/helpers/pippy 9 | manage 10 | 11 | install_command={toxinidir}/helpers/pippy {packages} 12 | setenv = 13 | PYTHONHASHSEED=94967295 14 | PYTHONWARNINGS=all 15 | PIP_CACHE_DIR={homedir}/.pip/packages 16 | PIP_WHEEL_DIR={homedir}/.pip/wheels 17 | PIP_FIND_LINKS=file://{homedir}/.pip/wheels 18 | 19 | commands = 20 | pypy3-{optional,style}: true 21 | optional: manage test 22 | test: manage test 23 | style: manage lint 24 | 25 | deps = 26 | -r{toxinidir}/dev-requirements.txt 27 | -r{toxinidir}/requirements.txt 28 | py{37,38,39}-{optional}: -r{toxinidir}/optional-requirements.txt 29 | 30 | [testenv:pypy3] 31 | basepython==pypy3.7-7.3.2 32 | --------------------------------------------------------------------------------